Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
4a7aa7c3
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4a7aa7c3
编写于
8月 08, 2022
作者:
T
Thomas Young
提交者:
GitHub
8月 08, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
move lamb_op to phi (#44899)
上级
8537edaa
变更
21
显示空白变更内容
内联
并排
Showing
21 changed file
with
1551 addition
and
173 deletion
+1551
-173
paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py
...auto_code_generator/final_state_generator/python_c_gen.py
+2
-0
paddle/fluid/operators/optimizers/distributed_fused_lamb_init_op.cu
...id/operators/optimizers/distributed_fused_lamb_init_op.cu
+4
-1
paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu
...e/fluid/operators/optimizers/distributed_fused_lamb_op.cu
+3
-1
paddle/fluid/operators/optimizers/lamb_op.cc
paddle/fluid/operators/optimizers/lamb_op.cc
+18
-127
paddle/fluid/operators/optimizers/lamb_op.cu
paddle/fluid/operators/optimizers/lamb_op.cu
+0
-22
paddle/fluid/operators/optimizers/lamb_op_xpu.cc
paddle/fluid/operators/optimizers/lamb_op_xpu.cc
+2
-2
paddle/phi/api/yaml/legacy_api.yaml
paddle/phi/api/yaml/legacy_api.yaml
+12
-0
paddle/phi/infermeta/multiary.cc
paddle/phi/infermeta/multiary.cc
+99
-0
paddle/phi/infermeta/multiary.h
paddle/phi/infermeta/multiary.h
+21
-0
paddle/phi/kernels/cpu/lamb_kernel.cc
paddle/phi/kernels/cpu/lamb_kernel.cc
+20
-0
paddle/phi/kernels/funcs/lamb_functors.h
paddle/phi/kernels/funcs/lamb_functors.h
+463
-0
paddle/phi/kernels/funcs/tensor_to_string.h
paddle/phi/kernels/funcs/tensor_to_string.h
+18
-19
paddle/phi/kernels/gpu/lamb_kernel.cu
paddle/phi/kernels/gpu/lamb_kernel.cu
+30
-0
paddle/phi/kernels/impl/lamb_kernel_impl.h
paddle/phi/kernels/impl/lamb_kernel_impl.h
+296
-0
paddle/phi/kernels/lamb_kernel.h
paddle/phi/kernels/lamb_kernel.h
+44
-0
paddle/phi/kernels/selected_rows/cpu/lamb_kernel.cc
paddle/phi/kernels/selected_rows/cpu/lamb_kernel.cc
+21
-0
paddle/phi/kernels/selected_rows/gpu/lamb_kernel.cu
paddle/phi/kernels/selected_rows/gpu/lamb_kernel.cu
+30
-0
paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
+351
-0
paddle/phi/kernels/selected_rows/lamb_kernel.h
paddle/phi/kernels/selected_rows/lamb_kernel.h
+46
-0
paddle/phi/ops/compat/lamb_sig.cc
paddle/phi/ops/compat/lamb_sig.cc
+62
-0
python/paddle/optimizer/lamb.py
python/paddle/optimizer/lamb.py
+9
-1
未找到文件。
paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py
浏览文件 @
4a7aa7c3
...
...
@@ -88,6 +88,8 @@ no_amp_list = [
'rmsprop'
,
'sgd_'
,
'sgd'
,
'lamb_'
,
'lamb'
,
'assign_value_'
,
'sparse_momentum_'
,
'sparse_momentum'
,
...
...
paddle/fluid/operators/optimizers/distributed_fused_lamb_init_op.cu
浏览文件 @
4a7aa7c3
...
...
@@ -15,15 +15,18 @@
#include "paddle/fluid/operators/optimizers/distributed_fused_lamb_init_op.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/optimizers/cast_with_ptr.h"
#include "paddle/fluid/operators/tensor_to_string.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/tensor_to_string.h"
namespace
paddle
{
namespace
operators
{
using
phi
::
funcs
::
FlattenToString
;
using
phi
::
funcs
::
ToVector
;
struct
ParamGradInfo
{
framework
::
Tensor
*
param_t
{
nullptr
};
framework
::
Tensor
*
grad_t
{
nullptr
};
...
...
paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu
浏览文件 @
4a7aa7c3
...
...
@@ -19,12 +19,12 @@
#include "paddle/fluid/operators/optimizers/cast_with_ptr.h"
#include "paddle/fluid/operators/optimizers/distributed_fused_lamb_op.h"
#include "paddle/fluid/operators/optimizers/multi_tensor_apply.h"
#include "paddle/fluid/operators/tensor_to_string.h"
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/fluid/string/string_helper.h"
#include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/kernels/funcs/aligned_vector.h"
#include "paddle/phi/kernels/funcs/tensor_to_string.h"
#ifdef __NVCC__
#include "cub/cub.cuh"
...
...
@@ -43,6 +43,8 @@ namespace operators {
template
<
typename
T
>
using
MasterT
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
using
phi
::
funcs
::
FlattenToString
;
using
phi
::
funcs
::
ToVector
;
template
<
typename
T
>
static
void
FillZeroWithPtr
(
T
*
x
,
size_t
n
,
gpuStream_t
stream
)
{
...
...
paddle/fluid/operators/optimizers/lamb_op.cc
浏览文件 @
4a7aa7c3
/* Copyright (c) 20
19
PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 20
22
PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
...
...
@@ -12,11 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/optimizers/lamb_op.h"
#include <string>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/infermeta/multiary.h"
#include "paddle/phi/kernels/lamb_kernel.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -25,125 +29,6 @@ class LambOp : public framework::OperatorWithKernel {
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Param"
),
true
,
platform
::
errors
::
NotFound
(
"Input(Param) of LambOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Grad"
),
true
,
platform
::
errors
::
NotFound
(
"Input(Grad) of LambOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Moment1"
),
true
,
platform
::
errors
::
NotFound
(
"Input(Moment1) of LambOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Moment2"
),
true
,
platform
::
errors
::
NotFound
(
"Input(Moment2) of LambOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"LearningRate"
),
true
,
platform
::
errors
::
NotFound
(
"Input(LearningRate) of LambOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Beta1Pow"
),
true
,
platform
::
errors
::
NotFound
(
"Input(Beta1Pow) of LambOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Beta2Pow"
),
true
,
platform
::
errors
::
NotFound
(
"Input(Beta2Pow) of LambOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"ParamOut"
),
true
,
platform
::
errors
::
NotFound
(
"Output(ParamOut) of LambOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"Moment1Out"
),
true
,
platform
::
errors
::
NotFound
(
"Output(Moment1Out) of LambOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"Moment2Out"
),
true
,
platform
::
errors
::
NotFound
(
"Output(Moment2Out) of LambOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"Beta1PowOut"
),
true
,
platform
::
errors
::
NotFound
(
"Output(Beta1PowOut) of LambOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"Beta2PowOut"
),
true
,
platform
::
errors
::
NotFound
(
"Output(Beta2PowOut) of LambOp should not be null."
));
auto
lr_dims
=
ctx
->
GetInputDim
(
"LearningRate"
);
PADDLE_ENFORCE_NE
(
phi
::
product
(
lr_dims
),
0
,
platform
::
errors
::
InvalidArgument
(
"The number of LearningRate shall not be 0, but received %d. Maybe "
"the Input variable LearningRate has not "
"been initialized. You may need to confirm "
"if you put exe.run(startup_program) "
"after optimizer.minimize function."
,
phi
::
product
(
lr_dims
)));
PADDLE_ENFORCE_EQ
(
phi
::
product
(
lr_dims
),
1
,
platform
::
errors
::
InvalidArgument
(
"Learning rate should have 1 dimension, but received %d."
,
phi
::
product
(
lr_dims
)));
auto
beta1_pow_dims
=
ctx
->
GetInputDim
(
"Beta1Pow"
);
PADDLE_ENFORCE_GE
(
phi
::
product
(
beta1_pow_dims
),
1
,
platform
::
errors
::
InvalidArgument
(
"The size of Beta1 power accumulator should be "
"greater than 0, but received %d."
,
phi
::
product
(
beta1_pow_dims
)));
auto
beta2_pow_dims
=
ctx
->
GetInputDim
(
"Beta2Pow"
);
PADDLE_ENFORCE_GE
(
phi
::
product
(
beta2_pow_dims
),
1
,
platform
::
errors
::
InvalidArgument
(
"The size of Beta2 power accumulator should be "
"greater than 0, but received %d."
,
phi
::
product
(
beta2_pow_dims
)));
auto
param_dims
=
ctx
->
GetInputDim
(
"Param"
);
if
(
ctx
->
GetInputsVarType
(
"Grad"
)[
0
]
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
PADDLE_ENFORCE_EQ
(
param_dims
,
ctx
->
GetInputDim
(
"Grad"
),
platform
::
errors
::
InvalidArgument
(
"Param and Grad input of LambOp should have same dimension. But "
"received Param dims: [%s], Grad dims: [%s]."
,
param_dims
,
ctx
->
GetInputDim
(
"Grad"
)));
}
PADDLE_ENFORCE_EQ
(
param_dims
,
ctx
->
GetInputDim
(
"Moment1"
),
platform
::
errors
::
InvalidArgument
(
"Param and Moment1 input of LambOp should have same dimension. But "
"received Param dims: [%s], Moment1 dims: [%s]."
,
param_dims
,
ctx
->
GetInputDim
(
"Moment1"
)));
PADDLE_ENFORCE_EQ
(
param_dims
,
ctx
->
GetInputDim
(
"Moment2"
),
platform
::
errors
::
InvalidArgument
(
"Param and Moment2 input of LambOp should have same dimension. But "
"received Param dims: [%s], Moment2 dims: [%s]."
,
param_dims
,
ctx
->
GetInputDim
(
"Moment2"
)));
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dims
);
ctx
->
SetOutputDim
(
"Moment1Out"
,
param_dims
);
ctx
->
SetOutputDim
(
"Moment2Out"
,
param_dims
);
ctx
->
SetOutputDim
(
"Beta1PowOut"
,
beta1_pow_dims
);
ctx
->
SetOutputDim
(
"Beta2PowOut"
,
beta2_pow_dims
);
}
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
auto
input_data_type
=
...
...
@@ -246,10 +131,16 @@ learning rate, $\lambda$ the weight decay rate.
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_WITHOUT_GRADIENT
(
lamb
,
ops
::
LambOp
,
ops
::
LambOpMaker
);
REGISTER_OP_CPU_KERNEL
(
lamb
,
ops
::
LambOpKernel
<
phi
::
CPUContext
,
float
>
,
ops
::
LambOpKernel
<
phi
::
CPUContext
,
double
>
);
DECLARE_INFER_SHAPE_FUNCTOR
(
lamb
,
LambInferMetaFunctor
,
PD_INFER_META
(
phi
::
LambInferMeta
));
REGISTER_OPERATOR
(
lamb
,
ops
::
LambOp
,
ops
::
LambOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
imperative
::
OpBase
>
,
LambInferMetaFunctor
);
/* ========================== register checkpoint ===========================*/
REGISTER_OP_VERSION
(
lamb
).
AddCheckpoint
(
...
...
paddle/fluid/operators/optimizers/lamb_op.cu
已删除
100644 → 0
浏览文件 @
8537edaa
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/optimizers/lamb_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
lamb
,
ops
::
LambOpKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
float16
>
,
ops
::
LambOpKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
LambOpKernel
<
phi
::
GPUContext
,
double
>
);
paddle/fluid/operators/optimizers/lamb_op_xpu.cc
浏览文件 @
4a7aa7c3
/* Copyright (c) 20
16
PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 20
22
PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
...
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "gflags/gflags.h"
#include "paddle/fluid/
operators/optimizers/lamb_op
.h"
#include "paddle/fluid/
framework/op_registry
.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
namespace
paddle
{
...
...
paddle/phi/api/yaml/legacy_api.yaml
浏览文件 @
4a7aa7c3
...
...
@@ -1327,6 +1327,18 @@
optional
:
prior_dist
backward
:
label_smooth_grad
-
api
:
lamb_
args
:
(Tensor param, Tensor grad, Tensor learning_rate, Tensor moment1, Tensor moment2, Tensor beta1_pow, Tensor beta2_pow, Tensor master_param, Tensor skip_update, float weight_decay, float beta1, float beta2, float epsilon, bool multi_precision)
output
:
Tensor(param_out), Tensor(moment1_out), Tensor(moment2_out), Tensor(beta1_pow_out), Tensor(beta2_pow_out), Tensor(master_param_outs)
infer_meta
:
func
:
LambInferMeta
kernel
:
func
:
lamb {dense, dense, dense, dense, dense, dense, dense, dense, dense -> dense, dense, dense, dense, dense, dense},
lamb_sr {dense, selected_rows, dense, dense, dense, dense, dense, dense, dense -> dense, dense, dense, dense, dense, dense}
data_type
:
param
optional
:
master_param, skip_update
inplace
:
(param -> param_out), (moment1 -> moment1_out), (moment2 -> moment2_out), (beta1_pow -> beta1_pow_out), (beta2_pow -> beta2_pow_out), (master_param -> master_param_outs)
-
api
:
layer_norm
args
:
(Tensor x, Tensor scale, Tensor bias, float epsilon, int begin_norm_axis, bool is_test)
output
:
Tensor(out), Tensor(mean), Tensor(variance)
...
...
paddle/phi/infermeta/multiary.cc
浏览文件 @
4a7aa7c3
...
...
@@ -1642,6 +1642,105 @@ void InterpolateInferMeta(
}
}
void
LambInferMeta
(
const
MetaTensor
&
param
,
const
MetaTensor
&
grad
,
const
MetaTensor
&
learning_rate
,
const
MetaTensor
&
moment1
,
const
MetaTensor
&
moment2
,
const
MetaTensor
&
beta1_pow
,
const
MetaTensor
&
beta2_pow
,
const
MetaTensor
&
master_param
,
const
MetaTensor
&
skip_update
,
float
weight_decay
,
float
beta1
,
float
beta2
,
float
epsilon
,
bool
multi_precision
,
MetaTensor
*
param_out
,
MetaTensor
*
moment1_out
,
MetaTensor
*
moment2_out
,
MetaTensor
*
beta1_pow_out
,
MetaTensor
*
beta2_pow_out
,
MetaTensor
*
master_param_outs
)
{
auto
lr_dims
=
learning_rate
.
dims
();
PADDLE_ENFORCE_NE
(
phi
::
product
(
lr_dims
),
0
,
phi
::
errors
::
InvalidArgument
(
"The number of LearningRate shall not be 0, but received %d. Maybe "
"the Input variable LearningRate has not "
"been initialized. You may need to confirm "
"if you put exe.run(startup_program) "
"after optimizer.minimize function."
,
phi
::
product
(
lr_dims
)));
PADDLE_ENFORCE_EQ
(
phi
::
product
(
lr_dims
),
1
,
phi
::
errors
::
InvalidArgument
(
"Learning rate should have 1 dimension, but received %d."
,
phi
::
product
(
lr_dims
)));
auto
beta1_pow_dims
=
beta1_pow
.
dims
();
PADDLE_ENFORCE_GE
(
phi
::
product
(
beta1_pow_dims
),
1
,
phi
::
errors
::
InvalidArgument
(
"The size of Beta1 power accumulator should be "
"greater than 0, but received %d."
,
phi
::
product
(
beta1_pow_dims
)));
auto
beta2_pow_dims
=
beta2_pow
.
dims
();
PADDLE_ENFORCE_GE
(
phi
::
product
(
beta2_pow_dims
),
1
,
phi
::
errors
::
InvalidArgument
(
"The size of Beta2 power accumulator should be "
"greater than 0, but received %d."
,
phi
::
product
(
beta2_pow_dims
)));
auto
param_dims
=
param
.
dims
();
PADDLE_ENFORCE_EQ
(
param_dims
,
moment1
.
dims
(),
phi
::
errors
::
InvalidArgument
(
"Param and Moment1 input of LambOp should have same dimension. But "
"received Param dims: [%s], Moment1 dims: [%s]."
,
param_dims
,
moment1
.
dims
()));
PADDLE_ENFORCE_EQ
(
param_dims
,
moment2
.
dims
(),
errors
::
InvalidArgument
(
"Param and Moment2 input of AdamOp should have same dimension. But "
"received Param dims: [%s], Moment2 dims: [%s]."
,
param_dims
,
moment2
.
dims
()));
PADDLE_ENFORCE_NOT_NULL
(
param_out
,
errors
::
NotFound
(
"The output param_out can not be nullptr"
));
PADDLE_ENFORCE_NOT_NULL
(
moment1_out
,
errors
::
NotFound
(
"The output moment1_out can not be nullptr"
));
PADDLE_ENFORCE_NOT_NULL
(
moment2_out
,
errors
::
NotFound
(
"The output moment2_out can not be nullptr"
));
PADDLE_ENFORCE_NOT_NULL
(
beta1_pow_out
,
errors
::
NotFound
(
"The output beta1_pow_out can not be nullptr"
));
PADDLE_ENFORCE_NOT_NULL
(
beta2_pow_out
,
errors
::
NotFound
(
"The output beta2_pow_out can not be nullptr"
));
param_out
->
set_dims
(
param_dims
);
param_out
->
set_dtype
(
param
.
dtype
());
moment1_out
->
set_dims
(
param_dims
);
moment1_out
->
set_dtype
(
moment1
.
dtype
());
moment2_out
->
set_dims
(
param_dims
);
moment2_out
->
set_dtype
(
moment2
.
dtype
());
beta1_pow_out
->
set_dims
(
beta1_pow_dims
);
beta1_pow_out
->
set_dtype
(
beta1_pow
.
dtype
());
beta2_pow_out
->
set_dims
(
beta2_pow_dims
);
beta2_pow_out
->
set_dtype
(
beta2_pow
.
dtype
());
}
void
LogspaceInferMeta
(
const
MetaTensor
&
start
,
const
MetaTensor
&
stop
,
const
MetaTensor
&
number
,
...
...
paddle/phi/infermeta/multiary.h
浏览文件 @
4a7aa7c3
...
...
@@ -269,6 +269,27 @@ void InterpolateInferMeta(
MetaTensor
*
output
,
MetaConfig
config
=
MetaConfig
());
void
LambInferMeta
(
const
MetaTensor
&
param
,
const
MetaTensor
&
grad
,
const
MetaTensor
&
learning_rate
,
const
MetaTensor
&
moment1
,
const
MetaTensor
&
moment2
,
const
MetaTensor
&
beta1_pow
,
const
MetaTensor
&
beta2_pow
,
const
MetaTensor
&
master_param
,
const
MetaTensor
&
skip_update
,
float
weight_decay
,
float
beta1
,
float
beta2
,
float
epsilon
,
bool
multi_precision
,
MetaTensor
*
param_out
,
MetaTensor
*
moment1_out
,
MetaTensor
*
moment2_out
,
MetaTensor
*
beta1_pow_out
,
MetaTensor
*
beta2_pow_out
,
MetaTensor
*
master_param_outs
);
void
LogspaceInferMeta
(
const
MetaTensor
&
start
,
const
MetaTensor
&
stop
,
const
MetaTensor
&
number
,
...
...
paddle/phi/kernels/cpu/lamb_kernel.cc
0 → 100644
浏览文件 @
4a7aa7c3
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/lamb_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/lamb_kernel_impl.h"
PD_REGISTER_KERNEL
(
lamb
,
CPU
,
ALL_LAYOUT
,
phi
::
LambKernel
,
float
,
double
)
{}
paddle/
fluid/operators/optimizers/lamb_op
.h
→
paddle/
phi/kernels/funcs/lamb_functors
.h
浏览文件 @
4a7aa7c3
/* Copyright (c) 20
19
PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 20
22
PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
...
...
@@ -18,25 +18,26 @@ limitations under the License. */
#include <Eigen/Dense>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/buffer.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/operators/tensor_to_string.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/eigen/extensions.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/squared_l2_norm.h"
#include "paddle/phi/kernels/funcs/tensor_to_string.h"
namespace
paddle
{
namespace
operators
{
namespace
phi
{
namespace
scatter
=
paddle
::
operators
::
math
::
scatter
;
template
<
typename
T
,
bool
IsMultiPrecision
>
struct
LambMomentREGUpdateFunctor
{
using
MT
=
typename
std
::
conditional
<
IsMultiPrecision
,
typename
details
::
MPTypeTrait
<
T
>::
Type
,
using
MT
=
typename
std
::
conditional
<
IsMultiPrecision
,
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
,
T
>::
type
;
MT
weight_decay_
;
...
...
@@ -112,8 +113,9 @@ struct LambMomentREGUpdateFunctor {
template
<
typename
T
,
bool
IsMultiPrecision
>
struct
LambMomentMENUpdateFunctor
{
using
MT
=
typename
std
::
conditional
<
IsMultiPrecision
,
typename
details
::
MPTypeTrait
<
T
>::
Type
,
using
MT
=
typename
std
::
conditional
<
IsMultiPrecision
,
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
,
T
>::
type
;
MT
weight_decay_
;
...
...
@@ -458,356 +460,4 @@ struct LambParamUpateFunctor
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
LambOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
using
MT
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
bool
multi_precision
=
ctx
.
Attr
<
bool
>
(
"multi_precision"
);
if
(
multi_precision
)
{
ComputeImpl
<
MT
,
true
>
(
ctx
);
}
else
{
ComputeImpl
<
T
,
false
>
(
ctx
);
}
}
private:
template
<
typename
MT
,
bool
IsMultiPrecision
>
void
ComputeImpl
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
if
(
!
IsMultiPrecision
)
{
constexpr
auto
kIsSameType
=
std
::
is_same
<
T
,
MT
>::
value
;
PADDLE_ENFORCE_EQ
(
kIsSameType
,
true
,
platform
::
errors
::
InvalidArgument
(
"When multi_precision=False, T and MT must be the same type."
));
}
const
auto
*
skip_update
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"SkipUpdate"
);
const
bool
*
skip_update_flag
=
skip_update
&&
skip_update
->
IsInitialized
()
?
skip_update
->
data
<
bool
>
()
:
nullptr
;
if
(
skip_update_flag
&&
platform
::
is_cpu_place
(
skip_update
->
place
())
&&
(
*
skip_update_flag
))
{
return
;
}
auto
weight_decay
=
static_cast
<
MT
>
(
ctx
.
Attr
<
float
>
(
"weight_decay"
));
auto
beta1
=
static_cast
<
MT
>
(
ctx
.
Attr
<
float
>
(
"beta1"
));
auto
beta2
=
static_cast
<
MT
>
(
ctx
.
Attr
<
float
>
(
"beta2"
));
auto
epsilon
=
static_cast
<
MT
>
(
ctx
.
Attr
<
float
>
(
"epsilon"
));
const
auto
&
param
=
GET_DATA_SAFELY
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Param"
),
"Input"
,
"Param"
,
"Lamb"
);
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
const
auto
&
mom1
=
GET_DATA_SAFELY
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Moment1"
),
"Input"
,
"Moment1"
,
"Lamb"
);
const
auto
&
mom2
=
GET_DATA_SAFELY
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Moment2"
),
"Input"
,
"Moment2"
,
"Lamb"
);
const
auto
&
lr
=
GET_DATA_SAFELY
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"LearningRate"
),
"Input"
,
"LearningRate"
,
"Lamb"
);
const
auto
&
beta1_pow
=
GET_DATA_SAFELY
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Beta1Pow"
),
"Input"
,
"Beta1Pow"
,
"Lamb"
);
const
auto
&
beta2_pow
=
GET_DATA_SAFELY
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Beta2Pow"
),
"Input"
,
"Beta2Pow"
,
"Lamb"
);
auto
&
param_out
=
GET_DATA_SAFELY
(
ctx
.
Output
<
framework
::
LoDTensor
>
(
"ParamOut"
),
"Output"
,
"ParamOut"
,
"Lamb"
);
auto
&
mom1_out
=
GET_DATA_SAFELY
(
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Moment1Out"
),
"Output"
,
"Moment1Out"
,
"Lamb"
);
auto
&
mom2_out
=
GET_DATA_SAFELY
(
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Moment2Out"
),
"Output"
,
"Moment2Out"
,
"Lamb"
);
auto
&
beta1_pow_out
=
GET_DATA_SAFELY
(
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Beta1PowOut"
),
"Output"
,
"Beta1PowOut"
,
"Lamb"
);
auto
&
beta2_pow_out
=
GET_DATA_SAFELY
(
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Beta2PowOut"
),
"Output"
,
"Beta2PowOut"
,
"Lamb"
);
const
auto
*
master_param
=
IsMultiPrecision
?
ctx
.
Input
<
framework
::
LoDTensor
>
(
"MasterParam"
)
:
nullptr
;
auto
*
master_param_out
=
IsMultiPrecision
?
ctx
.
Output
<
framework
::
LoDTensor
>
(
"MasterParamOut"
)
:
nullptr
;
if
(
IsMultiPrecision
)
{
PADDLE_ENFORCE_NOT_NULL
(
master_param
,
platform
::
errors
::
InvalidArgument
(
"Input(MasterParam) must be provided when "
"multi_precision=True."
));
PADDLE_ENFORCE_NOT_NULL
(
master_param_out
,
platform
::
errors
::
InvalidArgument
(
"Output(MasterParamOut) must be provided "
"when multi_precision=True."
));
}
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
numel
=
param
.
numel
();
platform
::
ForRange
<
DeviceContext
>
for_range
(
dev_ctx
,
numel
);
auto
trust_ratio_div
=
ctx
.
AllocateTmpTensor
<
MT
,
DeviceContext
>
(
param
.
dims
(),
dev_ctx
);
auto
*
trust_ratio_div_ptr
=
trust_ratio_div
.
template
data
<
MT
>();
const
void
*
param_ptr
=
param
.
data
();
const
void
*
master_param_ptr
=
master_param
?
master_param
->
data
()
:
nullptr
;
void
*
param_out_ptr
=
param_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
());
void
*
master_param_out_ptr
=
master_param_out
?
master_param_out
->
template
mutable_data
<
MT
>(
ctx
.
GetPlace
())
:
nullptr
;
// Update moments
bool
should_update_beta_pow_later
=
false
;
const
MT
*
beta1_pow_ptr
=
nullptr
,
*
beta2_pow_ptr
=
nullptr
;
MT
*
beta1_pow_out_ptr
=
nullptr
,
*
beta2_pow_out_ptr
=
nullptr
;
VLOG
(
10
)
<<
"Beta1Pow place: "
<<
beta1_pow
.
place
()
<<
" , Beta2Pow place: "
<<
beta2_pow
.
place
();
if
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
())
{
auto
&
grad
=
grad_var
->
Get
<
framework
::
LoDTensor
>
();
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
())
&&
beta1_pow
.
place
()
==
platform
::
CPUPlace
()
&&
beta2_pow
.
place
()
==
platform
::
CPUPlace
())
{
LambMomentREGUpdateFunctor
<
T
,
IsMultiPrecision
>
moment_update_functor
(
weight_decay
,
beta1
,
beta2
,
epsilon
,
*
beta1_pow
.
template
data
<
MT
>(),
*
beta2_pow
.
template
data
<
MT
>(),
mom1
.
template
data
<
MT
>(),
mom1_out
.
template
mutable_data
<
MT
>(
ctx
.
GetPlace
()),
mom2
.
template
data
<
MT
>(),
mom2_out
.
template
mutable_data
<
MT
>(
ctx
.
GetPlace
()),
grad
.
template
data
<
T
>(),
static_cast
<
const
MT
*>
(
IsMultiPrecision
?
master_param_ptr
:
param_ptr
),
trust_ratio_div_ptr
,
skip_update_flag
);
for_range
(
moment_update_functor
);
beta1_pow_out
.
template
mutable_data
<
MT
>(
platform
::
CPUPlace
())[
0
]
=
beta1
*
beta1_pow
.
template
data
<
MT
>()[
0
];
beta2_pow_out
.
template
mutable_data
<
MT
>(
platform
::
CPUPlace
())[
0
]
=
beta2
*
beta2_pow
.
template
data
<
MT
>()[
0
];
}
else
{
beta1_pow_ptr
=
beta1_pow
.
template
data
<
MT
>();
beta2_pow_ptr
=
beta2_pow
.
template
data
<
MT
>();
beta1_pow_out_ptr
=
beta1_pow_out
.
template
mutable_data
<
MT
>(
ctx
.
GetPlace
());
beta2_pow_out_ptr
=
beta2_pow_out
.
template
mutable_data
<
MT
>(
ctx
.
GetPlace
());
should_update_beta_pow_later
=
true
;
LambMomentMENUpdateFunctor
<
T
,
IsMultiPrecision
>
moment_update_functor
(
weight_decay
,
beta1
,
beta2
,
epsilon
,
static_cast
<
const
MT
*>
(
beta1_pow_ptr
),
static_cast
<
const
MT
*>
(
beta2_pow_ptr
),
mom1
.
template
data
<
MT
>(),
mom1_out
.
template
mutable_data
<
MT
>(
ctx
.
GetPlace
()),
mom2
.
template
data
<
MT
>(),
mom2_out
.
template
mutable_data
<
MT
>(
ctx
.
GetPlace
()),
grad
.
template
data
<
T
>(),
static_cast
<
const
MT
*>
(
IsMultiPrecision
?
master_param_ptr
:
param_ptr
),
trust_ratio_div_ptr
,
skip_update_flag
);
for_range
(
moment_update_functor
);
}
}
else
if
(
grad_var
->
IsType
<
phi
::
SelectedRows
>
())
{
PADDLE_ENFORCE_EQ
(
IsMultiPrecision
,
false
,
platform
::
errors
::
Unimplemented
(
"SelectedRows gradient is not supported when "
"multi_precision=True."
));
constexpr
bool
kIsSameType
=
std
::
is_same
<
T
,
MT
>::
value
;
PADDLE_ENFORCE_EQ
(
kIsSameType
,
true
,
platform
::
errors
::
Unimplemented
(
"SelectedRows gradient is not supported when "
"multi_precision=True."
));
auto
&
grad
=
GET_DATA_SAFELY
(
ctx
.
Input
<
phi
::
SelectedRows
>
(
"Grad"
),
"Input"
,
"Grad"
,
"Lamb"
);
if
(
grad
.
rows
().
size
()
==
0
)
{
VLOG
(
3
)
<<
"grad row size is 0!!"
;
return
;
}
std
::
vector
<
int64_t
>
cpu_rows
(
grad
.
rows
().
begin
(),
grad
.
rows
().
end
());
bool
is_strict_sorted
=
true
;
for
(
size_t
i
=
1
;
i
<
cpu_rows
.
size
();
++
i
)
{
if
(
cpu_rows
[
i
-
1
]
>=
cpu_rows
[
i
])
{
is_strict_sorted
=
false
;
break
;
}
}
phi
::
SelectedRows
tmp_grad_merge
;
const
phi
::
SelectedRows
*
grad_merge_ptr
;
if
(
is_strict_sorted
)
{
grad_merge_ptr
=
&
grad
;
}
else
{
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
scatter
::
MergeAdd
<
DeviceContext
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
}
auto
&
grad_merge
=
*
grad_merge_ptr
;
auto
&
grad_tensor
=
grad_merge
.
value
();
const
T
*
grad_data
=
grad_tensor
.
template
data
<
T
>();
auto
*
grad_merge_rows
=
&
grad_merge
.
rows
();
paddle
::
framework
::
MixVector
<
int64_t
>
mixv_grad_merge_rows
(
grad_merge_rows
);
const
int64_t
*
rows
=
mixv_grad_merge_rows
.
Data
(
ctx
.
GetPlace
());
auto
row_numel
=
grad_tensor
.
numel
()
/
grad_merge
.
rows
().
size
();
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
())
&&
beta1_pow
.
place
()
==
platform
::
CPUPlace
()
&&
beta2_pow
.
place
()
==
platform
::
CPUPlace
())
{
SparseLambMomentREGUpdateFunctor
<
T
>
moment_update_functor
(
static_cast
<
T
>
(
weight_decay
),
static_cast
<
T
>
(
beta1
),
static_cast
<
T
>
(
beta2
),
static_cast
<
T
>
(
epsilon
),
*
beta1_pow
.
template
data
<
T
>(),
*
beta2_pow
.
template
data
<
T
>(),
mom1
.
template
data
<
T
>(),
mom1_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
mom2
.
template
data
<
T
>(),
mom2_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
grad_data
,
param
.
template
data
<
T
>(),
trust_ratio_div
.
template
data
<
T
>(),
rows
,
row_numel
,
grad_merge
.
rows
().
size
(),
skip_update_flag
);
for_range
(
moment_update_functor
);
beta1_pow_out
.
template
mutable_data
<
T
>(
platform
::
CPUPlace
())[
0
]
=
static_cast
<
T
>
(
beta1
)
*
beta1_pow
.
template
data
<
T
>()[
0
];
beta2_pow_out
.
template
mutable_data
<
T
>(
platform
::
CPUPlace
())[
0
]
=
static_cast
<
T
>
(
beta2
)
*
beta2_pow
.
template
data
<
T
>()[
0
];
}
else
{
beta1_pow_ptr
=
beta1_pow
.
template
data
<
MT
>();
beta2_pow_ptr
=
beta2_pow
.
template
data
<
MT
>();
beta1_pow_out_ptr
=
beta1_pow_out
.
template
mutable_data
<
MT
>(
ctx
.
GetPlace
());
beta2_pow_out_ptr
=
beta2_pow_out
.
template
mutable_data
<
MT
>(
ctx
.
GetPlace
());
should_update_beta_pow_later
=
true
;
SparseLambMomentMENUpdateFunctor
<
T
>
moment_update_functor
(
static_cast
<
T
>
(
weight_decay
),
static_cast
<
T
>
(
beta1
),
static_cast
<
T
>
(
beta2
),
static_cast
<
T
>
(
epsilon
),
reinterpret_cast
<
const
T
*>
(
beta1_pow_ptr
),
reinterpret_cast
<
const
T
*>
(
beta2_pow_ptr
),
mom1
.
template
data
<
T
>(),
mom1_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
mom2
.
template
data
<
T
>(),
mom2_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
grad_data
,
param
.
template
data
<
T
>(),
trust_ratio_div
.
template
data
<
T
>(),
rows
,
row_numel
,
grad_merge
.
rows
().
size
(),
skip_update_flag
);
for_range
(
moment_update_functor
);
}
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Variable type not supported by lamb_op. Expect LoDTensor or "
"SelectedRows, but got %s"
,
framework
::
ToTypeName
(
grad_var
->
Type
())));
}
// Update parameter
auto
p_norm_t
=
ctx
.
AllocateTmpTensor
<
MT
,
DeviceContext
>
({
1
},
dev_ctx
);
auto
*
p_norm_ptr
=
p_norm_t
.
template
data
<
MT
>();
auto
trust_ratio_div_norm_t
=
ctx
.
AllocateTmpTensor
<
MT
,
DeviceContext
>
({
1
},
dev_ctx
);
auto
*
trust_ratio_div_norm_ptr
=
trust_ratio_div_norm_t
.
template
data
<
MT
>();
// TODO(zengjinle): remove the following Eigen operations when
// *skip_update == true.
memory
::
Buffer
buffer
(
dev_ctx
.
GetPlace
());
phi
::
funcs
::
SquaredL2Norm
(
dev_ctx
,
reinterpret_cast
<
const
MT
*>
(
IsMultiPrecision
?
master_param_ptr
:
param_ptr
),
p_norm_ptr
,
numel
,
&
buffer
);
phi
::
funcs
::
SquaredL2Norm
(
dev_ctx
,
trust_ratio_div_ptr
,
trust_ratio_div_norm_ptr
,
numel
,
&
buffer
);
if
(
VLOG_IS_ON
(
1
))
{
const
auto
&
name
=
ctx
.
GetOp
().
Input
(
"Param"
);
auto
pn
=
ToVector
(
p_norm_ptr
,
1
,
dev_ctx
.
GetPlace
());
auto
tn
=
ToVector
(
trust_ratio_div_norm_ptr
,
1
,
dev_ctx
.
GetPlace
());
auto
dtype
=
framework
::
DataTypeToString
(
framework
::
DataTypeTrait
<
T
>::
DataType
());
VLOG
(
1
)
<<
"Param "
<<
dtype
<<
" "
<<
name
<<
" pn = "
<<
pn
[
0
]
<<
" , tn = "
<<
tn
[
0
];
}
#define CALL_PADDLE_UPDATE_LAMB_PARAM_FUNC(__should_update_beta_pow) \
do { \
LambParamUpateFunctor<T, MT, IsMultiPrecision, __should_update_beta_pow> \
param_update_functor(lr.template data<MT>(), \
static_cast<const T*>(param_ptr), \
static_cast<const MT*>(master_param_ptr), \
p_norm_ptr, \
trust_ratio_div_ptr, \
trust_ratio_div_norm_ptr, \
static_cast<T*>(param_out_ptr), \
static_cast<MT*>(master_param_out_ptr), \
skip_update_flag); \
if (__should_update_beta_pow) { \
param_update_functor.SetBetaPows(beta1_pow_ptr, \
beta2_pow_ptr, \
beta1_pow_out_ptr, \
beta2_pow_out_ptr, \
beta1, \
beta2); \
} \
for_range(param_update_functor); \
} while (0)
if
(
should_update_beta_pow_later
)
{
CALL_PADDLE_UPDATE_LAMB_PARAM_FUNC
(
true
);
}
else
{
CALL_PADDLE_UPDATE_LAMB_PARAM_FUNC
(
false
);
}
#undef CALL_PADDLE_UPDATE_LAMB_PARAM_FUNC
}
};
}
// namespace operators
}
// namespace paddle
}
// namespace phi
paddle/
fluid/operator
s/tensor_to_string.h
→
paddle/
phi/kernels/func
s/tensor_to_string.h
浏览文件 @
4a7aa7c3
...
...
@@ -16,13 +16,14 @@
#include <sstream>
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/string/string_helper.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/utils/string/string_helper.h"
namespace
p
addle
{
namespace
operator
s
{
namespace
p
hi
{
namespace
func
s
{
template
<
typename
T
>
static
const
std
::
vector
<
T
>
&
ToVector
(
const
std
::
vector
<
T
>
&
vec
)
{
...
...
@@ -30,17 +31,15 @@ static const std::vector<T> &ToVector(const std::vector<T> &vec) {
}
template
<
typename
T
>
static
std
::
vector
<
T
>
ToVector
(
const
T
*
x
,
size_t
n
,
const
platform
::
Place
&
place
)
{
static
std
::
vector
<
T
>
ToVector
(
const
T
*
x
,
size_t
n
,
const
phi
::
Place
&
place
)
{
#ifdef __NVCC__
if
(
platform
::
is_gpu_place
(
place
))
{
if
(
p
addle
::
p
latform
::
is_gpu_place
(
place
))
{
using
CopyT
=
typename
std
::
conditional
<
std
::
is_same
<
T
,
bool
>::
value
,
uint8_t
,
T
>::
type
;
std
::
vector
<
CopyT
>
cpu_x
(
n
);
auto
*
dev_ctx
=
static_cast
<
phi
::
GPUContext
*>
(
p
latform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
memory
::
Copy
(
platform
::
CPUPlace
(),
p
hi
::
DeviceContextPool
::
Instance
().
Get
(
place
));
paddle
::
memory
::
Copy
(
phi
::
CPUPlace
(),
cpu_x
.
data
(),
place
,
x
,
...
...
@@ -54,7 +53,7 @@ static std::vector<T> ToVector(const T *x,
}
template
<
typename
T
>
static
std
::
vector
<
T
>
ToVector
(
const
framework
::
Tensor
&
src
)
{
static
std
::
vector
<
T
>
ToVector
(
const
Dense
Tensor
&
src
)
{
if
(
!
src
.
IsInitialized
())
{
return
{};
}
...
...
@@ -64,8 +63,8 @@ static std::vector<T> ToVector(const framework::Tensor &src) {
template
<
typename
...
Args
>
static
std
::
string
FlattenToString
(
Args
&&
...
args
)
{
const
auto
&
vec
=
ToVector
(
std
::
forward
<
Args
>
(
args
)...);
return
"["
+
string
::
join_strings
(
vec
,
','
)
+
"]"
;
return
"["
+
paddle
::
string
::
join_strings
(
vec
,
','
)
+
"]"
;
}
}
// namespace
operator
s
}
// namespace p
addle
}
// namespace
func
s
}
// namespace p
hi
paddle/phi/kernels/gpu/lamb_kernel.cu
0 → 100644
浏览文件 @
4a7aa7c3
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/lamb_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/lamb_kernel_impl.h"
PD_REGISTER_KERNEL
(
lamb
,
GPU
,
ALL_LAYOUT
,
phi
::
LambKernel
,
phi
::
dtype
::
float16
,
float
,
double
)
{
kernel
->
InputAt
(
5
).
SetBackend
(
phi
::
Backend
::
ALL_BACKEND
);
kernel
->
InputAt
(
6
).
SetBackend
(
phi
::
Backend
::
ALL_BACKEND
);
}
paddle/phi/kernels/impl/lamb_kernel_impl.h
0 → 100644
浏览文件 @
4a7aa7c3
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/lamb_functors.h"
namespace
phi
{
template
<
typename
T
,
typename
MT
,
typename
Context
,
bool
IsMultiPrecision
>
void
ComputeImpl
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
param
,
const
DenseTensor
&
grad
,
const
DenseTensor
&
lr
,
const
DenseTensor
&
mom1
,
const
DenseTensor
&
mom2
,
const
DenseTensor
&
beta1_pow
,
const
DenseTensor
&
beta2_pow
,
const
paddle
::
optional
<
DenseTensor
>&
master_param_opt
,
const
paddle
::
optional
<
DenseTensor
>&
skip_update_opt
,
float
weight_decay_f
,
float
beta1_f
,
float
beta2_f
,
float
epsilon_f
,
bool
multi_precision
,
DenseTensor
*
param_out
,
DenseTensor
*
mom1_out
,
DenseTensor
*
mom2_out
,
DenseTensor
*
beta1_pow_out
,
DenseTensor
*
beta2_pow_out
,
DenseTensor
*
master_param_out
);
template
<
typename
T
,
typename
Context
>
void
LambKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
param
,
const
DenseTensor
&
grad
,
const
DenseTensor
&
learning_rate
,
const
DenseTensor
&
moment1
,
const
DenseTensor
&
moment2
,
const
DenseTensor
&
beta1_pow
,
const
DenseTensor
&
beta2_pow
,
const
paddle
::
optional
<
DenseTensor
>&
master_param
,
const
paddle
::
optional
<
DenseTensor
>&
skip_update
,
float
weight_decay
,
float
beta1
,
float
beta2
,
float
epsilon
,
bool
multi_precision
,
DenseTensor
*
param_out
,
DenseTensor
*
moment1_out
,
DenseTensor
*
moment2_out
,
DenseTensor
*
beta1_pow_out
,
DenseTensor
*
beta2_pow_out
,
DenseTensor
*
master_param_outs
)
{
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
if
(
multi_precision
)
{
ComputeImpl
<
T
,
MT
,
Context
,
true
>
(
dev_ctx
,
param
,
grad
,
learning_rate
,
moment1
,
moment2
,
beta1_pow
,
beta2_pow
,
master_param
,
skip_update
,
weight_decay
,
beta1
,
beta2
,
epsilon
,
multi_precision
,
param_out
,
moment1_out
,
moment2_out
,
beta1_pow_out
,
beta2_pow_out
,
master_param_outs
);
}
else
{
ComputeImpl
<
T
,
T
,
Context
,
false
>
(
dev_ctx
,
param
,
grad
,
learning_rate
,
moment1
,
moment2
,
beta1_pow
,
beta2_pow
,
master_param
,
skip_update
,
weight_decay
,
beta1
,
beta2
,
epsilon
,
multi_precision
,
param_out
,
moment1_out
,
moment2_out
,
beta1_pow_out
,
beta2_pow_out
,
master_param_outs
);
}
}
template
<
typename
T
,
typename
MT
,
typename
Context
,
bool
IsMultiPrecision
>
void
ComputeImpl
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
param
,
const
DenseTensor
&
grad
,
const
DenseTensor
&
lr
,
const
DenseTensor
&
mom1
,
const
DenseTensor
&
mom2
,
const
DenseTensor
&
beta1_pow
,
const
DenseTensor
&
beta2_pow
,
const
paddle
::
optional
<
DenseTensor
>&
master_param_opt
,
const
paddle
::
optional
<
DenseTensor
>&
skip_update_opt
,
float
weight_decay_f
,
float
beta1_f
,
float
beta2_f
,
float
epsilon_f
,
bool
multi_precision
,
DenseTensor
*
param_out
,
DenseTensor
*
mom1_out
,
DenseTensor
*
mom2_out
,
DenseTensor
*
beta1_pow_out
,
DenseTensor
*
beta2_pow_out
,
DenseTensor
*
master_param_out
)
{
if
(
!
IsMultiPrecision
)
{
constexpr
auto
kIsSameType
=
std
::
is_same
<
T
,
MT
>::
value
;
PADDLE_ENFORCE_EQ
(
kIsSameType
,
true
,
phi
::
errors
::
InvalidArgument
(
"When multi_precision=False, T and MT must be the same type."
));
}
const
auto
*
master_param
=
IsMultiPrecision
?
master_param_opt
.
get_ptr
()
:
nullptr
;
const
auto
*
skip_update
=
skip_update_opt
.
get_ptr
();
const
bool
*
skip_update_flag
=
skip_update
&&
skip_update
->
IsInitialized
()
?
skip_update
->
data
<
bool
>
()
:
nullptr
;
if
(
skip_update_flag
&&
paddle
::
platform
::
is_cpu_place
(
skip_update
->
place
())
&&
(
*
skip_update_flag
))
{
return
;
}
auto
weight_decay
=
static_cast
<
MT
>
(
weight_decay_f
);
auto
beta1
=
static_cast
<
MT
>
(
beta1_f
);
auto
beta2
=
static_cast
<
MT
>
(
beta2_f
);
auto
epsilon
=
static_cast
<
MT
>
(
epsilon_f
);
auto
numel
=
param
.
numel
();
phi
::
funcs
::
ForRange
<
Context
>
for_range
(
dev_ctx
,
numel
);
DenseTensor
trust_ratio_div
;
trust_ratio_div
.
Resize
(
param
.
dims
());
auto
*
trust_ratio_div_ptr
=
dev_ctx
.
template
Alloc
<
MT
>(
&
trust_ratio_div
);
const
void
*
param_ptr
=
param
.
data
();
const
void
*
master_param_ptr
=
master_param
?
master_param
->
data
()
:
nullptr
;
void
*
param_out_ptr
=
dev_ctx
.
template
Alloc
<
T
>(
param_out
);
void
*
master_param_out_ptr
=
master_param_out
?
dev_ctx
.
template
Alloc
<
MT
>(
master_param_out
)
:
nullptr
;
// Update moments
bool
should_update_beta_pow_later
=
false
;
const
MT
*
beta1_pow_ptr
=
nullptr
,
*
beta2_pow_ptr
=
nullptr
;
MT
*
beta1_pow_out_ptr
=
nullptr
,
*
beta2_pow_out_ptr
=
nullptr
;
VLOG
(
10
)
<<
"Beta1Pow place: "
<<
beta1_pow
.
place
()
<<
" , Beta2Pow place: "
<<
beta2_pow
.
place
();
// Diff from here
if
(
paddle
::
platform
::
is_gpu_place
(
dev_ctx
.
GetPlace
())
&&
beta1_pow
.
place
()
==
phi
::
CPUPlace
()
&&
beta2_pow
.
place
()
==
phi
::
CPUPlace
())
{
LambMomentREGUpdateFunctor
<
T
,
IsMultiPrecision
>
moment_update_functor
(
weight_decay
,
beta1
,
beta2
,
epsilon
,
*
beta1_pow
.
template
data
<
MT
>(),
*
beta2_pow
.
template
data
<
MT
>(),
mom1
.
template
data
<
MT
>(),
dev_ctx
.
template
Alloc
<
MT
>(
mom1_out
),
mom2
.
template
data
<
MT
>(),
dev_ctx
.
template
Alloc
<
MT
>(
mom2_out
),
grad
.
template
data
<
T
>(),
static_cast
<
const
MT
*>
(
IsMultiPrecision
?
master_param_ptr
:
param_ptr
),
trust_ratio_div_ptr
,
skip_update_flag
);
for_range
(
moment_update_functor
);
MT
*
beta1_pow_out_data
=
dev_ctx
.
template
HostAlloc
<
MT
>(
beta1_pow_out
);
beta1_pow_out_data
[
0
]
=
beta1
*
beta1_pow
.
template
data
<
MT
>()[
0
];
MT
*
beta2_pow_out_data
=
dev_ctx
.
template
HostAlloc
<
MT
>(
beta2_pow_out
);
beta2_pow_out_data
[
0
]
=
beta2
*
beta2_pow
.
template
data
<
MT
>()[
0
];
}
else
{
beta1_pow_ptr
=
beta1_pow
.
template
data
<
MT
>();
beta2_pow_ptr
=
beta2_pow
.
template
data
<
MT
>();
beta1_pow_out_ptr
=
dev_ctx
.
template
Alloc
<
MT
>(
beta1_pow_out
);
beta2_pow_out_ptr
=
dev_ctx
.
template
Alloc
<
MT
>(
beta2_pow_out
);
should_update_beta_pow_later
=
true
;
LambMomentMENUpdateFunctor
<
T
,
IsMultiPrecision
>
moment_update_functor
(
weight_decay
,
beta1
,
beta2
,
epsilon
,
static_cast
<
const
MT
*>
(
beta1_pow_ptr
),
static_cast
<
const
MT
*>
(
beta2_pow_ptr
),
mom1
.
template
data
<
MT
>(),
dev_ctx
.
template
Alloc
<
MT
>(
mom1_out
),
mom2
.
template
data
<
MT
>(),
dev_ctx
.
template
Alloc
<
MT
>(
mom2_out
),
grad
.
template
data
<
T
>(),
static_cast
<
const
MT
*>
(
IsMultiPrecision
?
master_param_ptr
:
param_ptr
),
trust_ratio_div_ptr
,
skip_update_flag
);
for_range
(
moment_update_functor
);
}
// Same from here
// Update parameter
// The code in the following part is exactly the same as that in
// paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h Please modify it
// together
DenseTensor
p_norm_t
;
p_norm_t
.
Resize
(
phi
::
make_ddim
({
1
}));
auto
*
p_norm_ptr
=
dev_ctx
.
template
Alloc
<
MT
>(
&
p_norm_t
);
DenseTensor
trust_ratio_div_norm_t
;
trust_ratio_div_norm_t
.
Resize
(
phi
::
make_ddim
({
1
}));
auto
*
trust_ratio_div_norm_ptr
=
dev_ctx
.
template
Alloc
<
MT
>(
&
trust_ratio_div_norm_t
);
// TODO(zengjinle): remove the following Eigen operations when
// *skip_update == true.
paddle
::
memory
::
Buffer
buffer
(
dev_ctx
.
GetPlace
());
phi
::
funcs
::
SquaredL2Norm
(
dev_ctx
,
reinterpret_cast
<
const
MT
*>
(
IsMultiPrecision
?
master_param_ptr
:
param_ptr
),
p_norm_ptr
,
numel
,
&
buffer
);
phi
::
funcs
::
SquaredL2Norm
(
dev_ctx
,
trust_ratio_div_ptr
,
trust_ratio_div_norm_ptr
,
numel
,
&
buffer
);
if
(
VLOG_IS_ON
(
1
))
{
const
auto
&
name
=
"Param"
;
auto
pn
=
phi
::
funcs
::
ToVector
(
p_norm_ptr
,
1
,
dev_ctx
.
GetPlace
());
auto
tn
=
phi
::
funcs
::
ToVector
(
trust_ratio_div_norm_ptr
,
1
,
dev_ctx
.
GetPlace
());
auto
dtype
=
paddle
::
framework
::
DataTypeToString
(
paddle
::
framework
::
DataTypeTrait
<
T
>::
DataType
());
VLOG
(
1
)
<<
"Param "
<<
dtype
<<
" "
<<
name
<<
" pn = "
<<
pn
[
0
]
<<
" , tn = "
<<
tn
[
0
];
}
#define CALL_PADDLE_UPDATE_LAMB_PARAM_FUNC(__should_update_beta_pow) \
do { \
LambParamUpateFunctor<T, MT, IsMultiPrecision, __should_update_beta_pow> \
param_update_functor(lr.template data<MT>(), \
static_cast<const T*>(param_ptr), \
static_cast<const MT*>(master_param_ptr), \
p_norm_ptr, \
trust_ratio_div_ptr, \
trust_ratio_div_norm_ptr, \
static_cast<T*>(param_out_ptr), \
static_cast<MT*>(master_param_out_ptr), \
skip_update_flag); \
if (__should_update_beta_pow) { \
param_update_functor.SetBetaPows(beta1_pow_ptr, \
beta2_pow_ptr, \
beta1_pow_out_ptr, \
beta2_pow_out_ptr, \
beta1, \
beta2); \
} \
for_range(param_update_functor); \
} while (0)
if
(
should_update_beta_pow_later
)
{
CALL_PADDLE_UPDATE_LAMB_PARAM_FUNC
(
true
);
}
else
{
CALL_PADDLE_UPDATE_LAMB_PARAM_FUNC
(
false
);
}
#undef CALL_PADDLE_UPDATE_LAMB_PARAM_FUNC
}
}
// namespace phi
paddle/phi/kernels/lamb_kernel.h
0 → 100644
浏览文件 @
4a7aa7c3
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
LambKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
param
,
const
DenseTensor
&
grad
,
const
DenseTensor
&
learning_rate
,
const
DenseTensor
&
moment1
,
const
DenseTensor
&
moment2
,
const
DenseTensor
&
beta1_pow
,
const
DenseTensor
&
beta2_pow
,
const
paddle
::
optional
<
DenseTensor
>&
master_param
,
const
paddle
::
optional
<
DenseTensor
>&
skip_update
,
float
weight_decay
,
float
beta1
,
float
beta2
,
float
epsilon
,
bool
multi_precision
,
DenseTensor
*
param_out
,
DenseTensor
*
moment1_out
,
DenseTensor
*
moment2_out
,
DenseTensor
*
beta1_pow_out
,
DenseTensor
*
beta2_pow_out
,
DenseTensor
*
master_param_outs
);
}
// namespace phi
paddle/phi/kernels/selected_rows/cpu/lamb_kernel.cc
0 → 100644
浏览文件 @
4a7aa7c3
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/selected_rows/lamb_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h"
PD_REGISTER_KERNEL
(
lamb_sr
,
CPU
,
ALL_LAYOUT
,
phi
::
sr
::
LambKernel
,
float
,
double
)
{}
paddle/phi/kernels/selected_rows/gpu/lamb_kernel.cu
0 → 100644
浏览文件 @
4a7aa7c3
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/selected_rows/lamb_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h"
PD_REGISTER_KERNEL
(
lamb_sr
,
GPU
,
ALL_LAYOUT
,
phi
::
sr
::
LambKernel
,
phi
::
dtype
::
float16
,
float
,
double
)
{
kernel
->
InputAt
(
5
).
SetBackend
(
phi
::
Backend
::
ALL_BACKEND
);
kernel
->
InputAt
(
6
).
SetBackend
(
phi
::
Backend
::
ALL_BACKEND
);
}
paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
0 → 100644
浏览文件 @
4a7aa7c3
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/kernels/funcs/lamb_functors.h"
namespace
phi
{
namespace
sr
{
template
<
typename
T
,
typename
MT
,
typename
Context
,
bool
IsMultiPrecision
>
void
ComputeRowImpl
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
param
,
const
SelectedRows
&
grad
,
const
DenseTensor
&
lr
,
const
DenseTensor
&
mom1
,
const
DenseTensor
&
mom2
,
const
DenseTensor
&
beta1_pow
,
const
DenseTensor
&
beta2_pow
,
const
paddle
::
optional
<
DenseTensor
>&
master_param_opt
,
const
paddle
::
optional
<
DenseTensor
>&
skip_update_opt
,
float
weight_decay_f
,
float
beta1_f
,
float
beta2_f
,
float
epsilon_f
,
bool
multi_precision
,
DenseTensor
*
param_out
,
DenseTensor
*
mom1_out
,
DenseTensor
*
mom2_out
,
DenseTensor
*
beta1_pow_out
,
DenseTensor
*
beta2_pow_out
,
DenseTensor
*
master_param_out
);
template
<
typename
T
,
typename
Context
>
void
LambKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
param
,
const
SelectedRows
&
grad
,
const
DenseTensor
&
learning_rate
,
const
DenseTensor
&
moment1
,
const
DenseTensor
&
moment2
,
const
DenseTensor
&
beta1_pow
,
const
DenseTensor
&
beta2_pow
,
const
paddle
::
optional
<
DenseTensor
>&
master_param
,
const
paddle
::
optional
<
DenseTensor
>&
skip_update
,
float
weight_decay
,
float
beta1
,
float
beta2
,
float
epsilon
,
bool
multi_precision
,
DenseTensor
*
param_out
,
DenseTensor
*
moment1_out
,
DenseTensor
*
moment2_out
,
DenseTensor
*
beta1_pow_out
,
DenseTensor
*
beta2_pow_out
,
DenseTensor
*
master_param_outs
)
{
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
if
(
multi_precision
)
{
ComputeRowImpl
<
T
,
MT
,
Context
,
true
>
(
dev_ctx
,
param
,
grad
,
learning_rate
,
moment1
,
moment2
,
beta1_pow
,
beta2_pow
,
master_param
,
skip_update
,
weight_decay
,
beta1
,
beta2
,
epsilon
,
multi_precision
,
param_out
,
moment1_out
,
moment2_out
,
beta1_pow_out
,
beta2_pow_out
,
master_param_outs
);
}
else
{
ComputeRowImpl
<
T
,
T
,
Context
,
false
>
(
dev_ctx
,
param
,
grad
,
learning_rate
,
moment1
,
moment2
,
beta1_pow
,
beta2_pow
,
master_param
,
skip_update
,
weight_decay
,
beta1
,
beta2
,
epsilon
,
multi_precision
,
param_out
,
moment1_out
,
moment2_out
,
beta1_pow_out
,
beta2_pow_out
,
master_param_outs
);
}
}
template
<
typename
T
,
typename
MT
,
typename
Context
,
bool
IsMultiPrecision
>
void
ComputeRowImpl
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
param
,
const
SelectedRows
&
grad
,
const
DenseTensor
&
lr
,
const
DenseTensor
&
mom1
,
const
DenseTensor
&
mom2
,
const
DenseTensor
&
beta1_pow
,
const
DenseTensor
&
beta2_pow
,
const
paddle
::
optional
<
DenseTensor
>&
master_param_opt
,
const
paddle
::
optional
<
DenseTensor
>&
skip_update_opt
,
float
weight_decay_f
,
float
beta1_f
,
float
beta2_f
,
float
epsilon_f
,
bool
multi_precision
,
DenseTensor
*
param_out
,
DenseTensor
*
mom1_out
,
DenseTensor
*
mom2_out
,
DenseTensor
*
beta1_pow_out
,
DenseTensor
*
beta2_pow_out
,
DenseTensor
*
master_param_out
)
{
if
(
!
IsMultiPrecision
)
{
constexpr
auto
kIsSameType
=
std
::
is_same
<
T
,
MT
>::
value
;
PADDLE_ENFORCE_EQ
(
kIsSameType
,
true
,
phi
::
errors
::
InvalidArgument
(
"When multi_precision=False, T and MT must be the same type."
));
}
const
auto
*
master_param
=
IsMultiPrecision
?
master_param_opt
.
get_ptr
()
:
nullptr
;
const
auto
*
skip_update
=
skip_update_opt
.
get_ptr
();
const
bool
*
skip_update_flag
=
skip_update
&&
skip_update
->
IsInitialized
()
?
skip_update
->
data
<
bool
>
()
:
nullptr
;
if
(
skip_update_flag
&&
paddle
::
platform
::
is_cpu_place
(
skip_update
->
place
())
&&
(
*
skip_update_flag
))
{
return
;
}
auto
weight_decay
=
static_cast
<
MT
>
(
weight_decay_f
);
auto
beta1
=
static_cast
<
MT
>
(
beta1_f
);
auto
beta2
=
static_cast
<
MT
>
(
beta2_f
);
auto
epsilon
=
static_cast
<
MT
>
(
epsilon_f
);
auto
numel
=
param
.
numel
();
phi
::
funcs
::
ForRange
<
Context
>
for_range
(
dev_ctx
,
numel
);
DenseTensor
trust_ratio_div
;
trust_ratio_div
.
Resize
(
param
.
dims
());
/*auto trust_ratio_div =
ctx.AllocateTmpTensor<MT, DeviceContext>(param.dims(), dev_ctx);*/
auto
*
trust_ratio_div_ptr
=
dev_ctx
.
template
Alloc
<
MT
>(
&
trust_ratio_div
);
const
void
*
param_ptr
=
param
.
data
();
const
void
*
master_param_ptr
=
master_param
?
master_param
->
data
()
:
nullptr
;
void
*
param_out_ptr
=
dev_ctx
.
template
Alloc
<
T
>(
param_out
);
void
*
master_param_out_ptr
=
master_param_out
?
dev_ctx
.
template
Alloc
<
MT
>(
master_param_out
)
:
nullptr
;
// Update moments
bool
should_update_beta_pow_later
=
false
;
const
MT
*
beta1_pow_ptr
=
nullptr
,
*
beta2_pow_ptr
=
nullptr
;
MT
*
beta1_pow_out_ptr
=
nullptr
,
*
beta2_pow_out_ptr
=
nullptr
;
VLOG
(
10
)
<<
"Beta1Pow place: "
<<
beta1_pow
.
place
()
<<
" , Beta2Pow place: "
<<
beta2_pow
.
place
();
// Diff from here
PADDLE_ENFORCE_EQ
(
IsMultiPrecision
,
false
,
phi
::
errors
::
Unimplemented
(
"SelectedRows gradient is not supported when "
"multi_precision=True."
));
constexpr
bool
kIsSameType
=
std
::
is_same
<
T
,
MT
>::
value
;
PADDLE_ENFORCE_EQ
(
kIsSameType
,
true
,
phi
::
errors
::
Unimplemented
(
"SelectedRows gradient is not supported when "
"multi_precision=True."
));
if
(
grad
.
rows
().
size
()
==
0
)
{
VLOG
(
3
)
<<
"grad row size is 0!!"
;
return
;
}
std
::
vector
<
int64_t
>
cpu_rows
(
grad
.
rows
().
begin
(),
grad
.
rows
().
end
());
bool
is_strict_sorted
=
true
;
for
(
size_t
i
=
1
;
i
<
cpu_rows
.
size
();
++
i
)
{
if
(
cpu_rows
[
i
-
1
]
>=
cpu_rows
[
i
])
{
is_strict_sorted
=
false
;
break
;
}
}
phi
::
SelectedRows
tmp_grad_merge
;
const
phi
::
SelectedRows
*
grad_merge_ptr
;
if
(
is_strict_sorted
)
{
grad_merge_ptr
=
&
grad
;
}
else
{
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
}
auto
&
grad_merge
=
*
grad_merge_ptr
;
auto
&
grad_tensor
=
grad_merge
.
value
();
const
T
*
grad_data
=
grad_tensor
.
template
data
<
T
>();
auto
*
grad_merge_rows
=
&
grad_merge
.
rows
();
paddle
::
framework
::
MixVector
<
int64_t
>
mixv_grad_merge_rows
(
grad_merge_rows
);
const
int64_t
*
rows
=
mixv_grad_merge_rows
.
Data
(
dev_ctx
.
GetPlace
());
auto
row_numel
=
grad_tensor
.
numel
()
/
grad_merge
.
rows
().
size
();
if
(
paddle
::
platform
::
is_gpu_place
(
dev_ctx
.
GetPlace
())
&&
beta1_pow
.
place
()
==
phi
::
CPUPlace
()
&&
beta2_pow
.
place
()
==
phi
::
CPUPlace
())
{
SparseLambMomentREGUpdateFunctor
<
T
>
moment_update_functor
(
static_cast
<
T
>
(
weight_decay
),
static_cast
<
T
>
(
beta1
),
static_cast
<
T
>
(
beta2
),
static_cast
<
T
>
(
epsilon
),
*
beta1_pow
.
template
data
<
T
>(),
*
beta2_pow
.
template
data
<
T
>(),
mom1
.
template
data
<
T
>(),
dev_ctx
.
template
Alloc
<
T
>(
mom1_out
),
mom2
.
template
data
<
T
>(),
dev_ctx
.
template
Alloc
<
T
>(
mom2_out
),
grad_data
,
param
.
template
data
<
T
>(),
trust_ratio_div
.
template
data
<
T
>(),
rows
,
row_numel
,
grad_merge
.
rows
().
size
(),
skip_update_flag
);
for_range
(
moment_update_functor
);
T
*
beta1_pow_out_data
=
dev_ctx
.
template
HostAlloc
<
T
>(
beta1_pow_out
);
beta1_pow_out_data
[
0
]
=
static_cast
<
T
>
(
beta1
)
*
beta1_pow
.
template
data
<
T
>()[
0
];
T
*
beta2_pow_out_data
=
dev_ctx
.
template
HostAlloc
<
T
>(
beta2_pow_out
);
beta2_pow_out_data
[
0
]
=
static_cast
<
T
>
(
beta2
)
*
beta2_pow
.
template
data
<
T
>()[
0
];
}
else
{
beta1_pow_ptr
=
beta1_pow
.
template
data
<
MT
>();
beta2_pow_ptr
=
beta2_pow
.
template
data
<
MT
>();
beta1_pow_out_ptr
=
dev_ctx
.
template
Alloc
<
MT
>(
beta1_pow_out
);
beta2_pow_out_ptr
=
dev_ctx
.
template
Alloc
<
MT
>(
beta2_pow_out
);
should_update_beta_pow_later
=
true
;
SparseLambMomentMENUpdateFunctor
<
T
>
moment_update_functor
(
static_cast
<
T
>
(
weight_decay
),
static_cast
<
T
>
(
beta1
),
static_cast
<
T
>
(
beta2
),
static_cast
<
T
>
(
epsilon
),
reinterpret_cast
<
const
T
*>
(
beta1_pow_ptr
),
reinterpret_cast
<
const
T
*>
(
beta2_pow_ptr
),
mom1
.
template
data
<
T
>(),
dev_ctx
.
template
Alloc
<
T
>(
mom1_out
),
mom2
.
template
data
<
T
>(),
dev_ctx
.
template
Alloc
<
T
>(
mom2_out
),
grad_data
,
param
.
template
data
<
T
>(),
trust_ratio_div
.
template
data
<
T
>(),
rows
,
row_numel
,
grad_merge
.
rows
().
size
(),
skip_update_flag
);
for_range
(
moment_update_functor
);
}
// Same from here
// Update parameter
// The code in the following part is exactly the same as that in
// paddle/phi/kernels/impl/lamb_kernel_impl.h Please modify it together
DenseTensor
p_norm_t
;
p_norm_t
.
Resize
(
phi
::
make_ddim
({
1
}));
auto
*
p_norm_ptr
=
dev_ctx
.
template
Alloc
<
MT
>(
&
p_norm_t
);
DenseTensor
trust_ratio_div_norm_t
;
trust_ratio_div_norm_t
.
Resize
(
phi
::
make_ddim
({
1
}));
auto
*
trust_ratio_div_norm_ptr
=
dev_ctx
.
template
Alloc
<
MT
>(
&
trust_ratio_div_norm_t
);
// TODO(zengjinle): remove the following Eigen operations when
// *skip_update == true.
paddle
::
memory
::
Buffer
buffer
(
dev_ctx
.
GetPlace
());
phi
::
funcs
::
SquaredL2Norm
(
dev_ctx
,
reinterpret_cast
<
const
MT
*>
(
IsMultiPrecision
?
master_param_ptr
:
param_ptr
),
p_norm_ptr
,
numel
,
&
buffer
);
phi
::
funcs
::
SquaredL2Norm
(
dev_ctx
,
trust_ratio_div_ptr
,
trust_ratio_div_norm_ptr
,
numel
,
&
buffer
);
if
(
VLOG_IS_ON
(
1
))
{
const
auto
&
name
=
"Param"
;
auto
pn
=
phi
::
funcs
::
ToVector
(
p_norm_ptr
,
1
,
dev_ctx
.
GetPlace
());
auto
tn
=
phi
::
funcs
::
ToVector
(
trust_ratio_div_norm_ptr
,
1
,
dev_ctx
.
GetPlace
());
auto
dtype
=
paddle
::
framework
::
DataTypeToString
(
paddle
::
framework
::
DataTypeTrait
<
T
>::
DataType
());
VLOG
(
1
)
<<
"Param "
<<
dtype
<<
" "
<<
name
<<
" pn = "
<<
pn
[
0
]
<<
" , tn = "
<<
tn
[
0
];
}
#define CALL_PADDLE_UPDATE_LAMB_PARAM_FUNC(__should_update_beta_pow) \
do { \
LambParamUpateFunctor<T, MT, IsMultiPrecision, __should_update_beta_pow> \
param_update_functor(lr.template data<MT>(), \
static_cast<const T*>(param_ptr), \
static_cast<const MT*>(master_param_ptr), \
p_norm_ptr, \
trust_ratio_div_ptr, \
trust_ratio_div_norm_ptr, \
static_cast<T*>(param_out_ptr), \
static_cast<MT*>(master_param_out_ptr), \
skip_update_flag); \
if (__should_update_beta_pow) { \
param_update_functor.SetBetaPows(beta1_pow_ptr, \
beta2_pow_ptr, \
beta1_pow_out_ptr, \
beta2_pow_out_ptr, \
beta1, \
beta2); \
} \
for_range(param_update_functor); \
} while (0)
if
(
should_update_beta_pow_later
)
{
CALL_PADDLE_UPDATE_LAMB_PARAM_FUNC
(
true
);
}
else
{
CALL_PADDLE_UPDATE_LAMB_PARAM_FUNC
(
false
);
}
#undef CALL_PADDLE_UPDATE_LAMB_PARAM_FUNC
}
}
// namespace sr
}
// namespace phi
paddle/phi/kernels/selected_rows/lamb_kernel.h
0 → 100644
浏览文件 @
4a7aa7c3
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/selected_rows.h"
namespace
phi
{
namespace
sr
{
template
<
typename
T
,
typename
Context
>
void
LambKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
param
,
const
SelectedRows
&
grad
,
const
DenseTensor
&
learning_rate
,
const
DenseTensor
&
moment1
,
const
DenseTensor
&
moment2
,
const
DenseTensor
&
beta1_pow
,
const
DenseTensor
&
beta2_pow
,
const
paddle
::
optional
<
DenseTensor
>&
master_param
,
const
paddle
::
optional
<
DenseTensor
>&
skip_update
,
float
weight_decay
,
float
beta1
,
float
beta2
,
float
epsilon
,
bool
multi_precision
,
DenseTensor
*
param_out
,
DenseTensor
*
moment1_out
,
DenseTensor
*
moment2_out
,
DenseTensor
*
beta1_pow_out
,
DenseTensor
*
beta2_pow_out
,
DenseTensor
*
master_param_outs
);
}
// namespace sr
}
// namespace phi
paddle/phi/ops/compat/lamb_sig.cc
0 → 100644
浏览文件 @
4a7aa7c3
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "paddle/phi/core/compat/op_utils.h"
#include "paddle/utils/small_vector.h"
namespace
phi
{
KernelSignature
LambOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
paddle
::
small_vector
<
const
char
*>
in_names
=
{
"Param"
,
"Grad"
,
"LearningRate"
,
"Moment1"
,
"Moment2"
,
"Beta1Pow"
,
"Beta2Pow"
,
"MasterParam"
,
"SkipUpdate"
};
paddle
::
small_vector
<
const
char
*>
out_names
=
{
"ParamOut"
,
"Moment1Out"
,
"Moment2Out"
,
"Beta1PowOut"
,
"Beta2PowOut"
,
"MasterParamOut"
};
paddle
::
small_vector
<
const
char
*>
attr_names
;
attr_names
.
emplace_back
(
"weight_decay"
);
attr_names
.
emplace_back
(
"beta1"
);
attr_names
.
emplace_back
(
"beta2"
);
attr_names
.
emplace_back
(
"epsilon"
);
attr_names
.
emplace_back
(
"multi_precision"
);
if
(
ctx
.
IsSelectedRowsInput
(
"Grad"
))
{
return
KernelSignature
(
"lamb_sr"
,
std
::
move
(
in_names
),
std
::
move
(
attr_names
),
std
::
move
(
out_names
));
}
else
if
(
ctx
.
IsDenseTensorInput
(
"Grad"
))
{
return
KernelSignature
(
"lamb"
,
std
::
move
(
in_names
),
std
::
move
(
attr_names
),
std
::
move
(
out_names
));
}
else
{
return
KernelSignature
(
"unregistered"
,
{},
{},
{});
}
}
}
// namespace phi
PD_REGISTER_ARG_MAPPING_FN
(
lamb
,
phi
::
LambOpArgumentMapping
);
python/paddle/optimizer/lamb.py
浏览文件 @
4a7aa7c3
# Copyright (c) 202
0
PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 202
2
PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -21,6 +21,7 @@ from ..fluid import unique_name
from
..fluid.layer_helper
import
LayerHelper
from
paddle
import
_C_ops
from
paddle.fluid.executor
import
global_scope
import
paddle
__all__
=
[]
...
...
@@ -266,6 +267,13 @@ class Lamb(Optimizer):
master_weight
=
None
found_inf
=
self
.
_get_auxiliary_var
(
'found_inf'
)
if
framework
.
in_dygraph_mode
():
_C_ops
.
final_state_lamb_
(
param_and_grad
[
0
],
param_and_grad
[
1
],
lr
,
moment1
,
moment2
,
beta1_pow_acc
,
beta2_pow_acc
,
master_weight
,
found_inf
,
weight_decay
,
self
.
_beta1
,
self
.
_beta2
,
self
.
_epsilon
,
find_master
)
return
None
if
framework
.
_non_static_mode
():
_C_ops
.
lamb
(
param_and_grad
[
0
],
param_and_grad
[
1
],
lr
,
moment1
,
moment2
,
beta1_pow_acc
,
beta2_pow_acc
,
master_weight
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录