Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
39ef5736
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
39ef5736
编写于
12月 25, 2017
作者:
Y
Yang Yu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Use tranform to rewrite adam
上级
cbe973f9
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
93 addition
and
39 deletion
+93
-39
paddle/operators/adam_op.h
paddle/operators/adam_op.h
+93
-39
未找到文件。
paddle/operators/adam_op.h
浏览文件 @
39ef5736
...
...
@@ -13,59 +13,113 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include
"paddle/framework/eigen.h"
#include
<math.h> // for sqrt in CPU and CUDA
#include "paddle/framework/op_registry.h"
#include "paddle/operators/detail/safe_ref.h"
#include "paddle/platform/transform.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
struct
AdamFunctor
{
T
beta1_
;
T
beta2_
;
T
epsilon_
;
const
T
*
beta1_pow_
;
const
T
*
beta2_pow_
;
const
T
*
moment1_
;
T
*
moment1_out_
;
const
T
*
moment2_
;
T
*
moment2_out_
;
const
T
*
lr_
;
const
T
*
grad_
;
const
T
*
param_
;
AdamFunctor
(
T
beta1
,
T
beta2
,
T
epsilon
,
const
T
*
beta1_pow
,
const
T
*
beta2_pow
,
const
T
*
mom1
,
T
*
mom1_out
,
const
T
*
mom2
,
T
*
mom2_out
,
const
T
*
lr
,
const
T
*
grad
,
const
T
*
param
)
:
beta1_
(
beta1
),
beta2_
(
beta2
),
epsilon_
(
epsilon
),
beta1_pow_
(
beta1_pow
),
beta2_pow_
(
beta2_pow
),
moment1_
(
mom1
),
moment1_out_
(
mom1_out
),
moment2_
(
mom2
),
moment2_out_
(
mom2_out
),
lr_
(
lr
),
grad_
(
grad
),
param_
(
param
)
{}
// From param[i] --> param_out[i];
inline
HOSTDEVICE
T
operator
()(
const
T
&
p
)
const
{
size_t
i
=
&
p
-
param_
;
// Merge all memory access together.
T
g
=
grad_
[
i
];
T
mom1
=
moment1_
[
i
];
T
mom2
=
moment2_
[
i
];
T
lr
=
*
lr_
;
T
beta1_pow
=
*
beta1_pow_
;
T
beta2_pow
=
*
beta2_pow_
;
// Calculation
lr
=
lr
*
sqrt
(
1
-
beta2_pow
)
/
(
1
-
beta1_pow
);
mom1
=
beta1_
*
mom1
+
(
1
-
beta1_
)
*
g
;
mom2
=
beta2_
*
mom2
+
(
1
-
beta2_
)
*
g
*
g
;
T
new_p
=
p
-
lr
*
(
mom1
/
(
sqrt
(
mom2
)
+
epsilon_
));
// Write back to global memory
moment1_out_
[
i
]
=
mom1
;
moment2_out_
[
i
]
=
mom2
;
return
new_p
;
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
AdamOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment1_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Moment1Out"
);
auto
moment2_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Moment2Out"
);
param_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
moment1_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
moment2_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
using
paddle
::
framework
::
LoDTensor
;
using
paddle
::
operators
::
detail
::
Ref
;
T
beta1
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"beta1"
));
T
beta2
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"beta2"
));
T
epsilon
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"epsilon"
));
auto
&
param
=
Ref
(
ctx
.
Input
<
LoDTensor
>
(
"Param"
),
"Must set Param"
);
auto
&
grad
=
Ref
(
ctx
.
Input
<
LoDTensor
>
(
"Grad"
),
"Must set Grad"
);
auto
&
mom1
=
Ref
(
ctx
.
Input
<
LoDTensor
>
(
"Moment1"
),
"Must set Moment1"
);
auto
&
mom2
=
Ref
(
ctx
.
Input
<
LoDTensor
>
(
"Moment2"
),
"Must set Moment2"
);
auto
&
lr
=
Ref
(
ctx
.
Input
<
LoDTensor
>
(
"LearningRate"
),
"Must set LearningRate"
);
auto
&
beta1_pow
=
Ref
(
ctx
.
Input
<
LoDTensor
>
(
"Beta1Pow"
),
"Must set Beta1Pow"
);
auto
&
beta2_pow
=
Ref
(
ctx
.
Input
<
LoDTensor
>
(
"Beta2Pow"
),
"Must set Beta2Pow"
);
auto
&
param_out
=
Ref
(
ctx
.
Output
<
LoDTensor
>
(
"ParamOut"
),
"Must set ParamOut"
);
auto
&
mom1_out
=
Ref
(
ctx
.
Output
<
LoDTensor
>
(
"Moment1Out"
),
"Must set Moment1Out"
);
auto
&
mom2_out
=
Ref
(
ctx
.
Output
<
LoDTensor
>
(
"Moment2Out"
),
"Must set Moment1Out"
);
AdamFunctor
<
T
>
functor
(
beta1
,
beta2
,
epsilon
,
beta1_pow
.
template
data
<
T
>(),
beta2_pow
.
template
data
<
T
>(),
mom1
.
template
data
<
T
>(),
mom1_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
mom2
.
template
data
<
T
>(),
mom2_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
lr
.
template
data
<
T
>(),
grad
.
template
data
<
T
>(),
param
.
template
data
<
T
>());
auto
param
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
));
auto
grad
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
framework
::
Tensor
>
(
"Grad"
));
auto
moment1
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
framework
::
Tensor
>
(
"Moment1"
));
auto
moment2
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
framework
::
Tensor
>
(
"Moment2"
));
auto
lr
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
framework
::
Tensor
>
(
"LearningRate"
));
auto
beta1_pow
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
framework
::
Tensor
>
(
"Beta1Pow"
));
auto
beta2_pow
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
framework
::
Tensor
>
(
"Beta2Pow"
));
auto
param_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param_out_tensor
);
auto
moment1_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
moment1_out_tensor
);
auto
moment2_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
moment2_out_tensor
);
auto
*
place
=
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
moment1_out
.
device
(
*
place
)
=
beta1
*
moment1
+
(
1
-
beta1
)
*
grad
;
moment2_out
.
device
(
*
place
)
=
beta2
*
moment2
+
(
1
-
beta2
)
*
grad
.
square
();
// All of these are tensors of 1 element
auto
lr_t
=
lr
*
(
1
-
beta2_pow
).
sqrt
()
/
(
1
-
beta1_pow
);
// Eigen does not support automatic broadcast
// Get dimensions of moment vector to broadcast lr_t
Eigen
::
DSizes
<
int
,
1
>
m_dsize
(
moment1_out_tensor
->
numel
());
param_out
.
device
(
*
place
)
=
param
-
lr_t
.
broadcast
(
m_dsize
)
*
(
moment1_out
/
(
moment2_out
.
sqrt
()
+
epsilon
));
const
T
*
in_ptr
=
param
.
template
data
<
T
>();
T
*
out_ptr
=
param_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
());
platform
::
Transform
<
DeviceContext
>
trans
;
trans
(
static_cast
<
const
DeviceContext
&>
(
ctx
.
device_context
()),
in_ptr
,
in_ptr
+
param_out
.
numel
(),
out_ptr
,
functor
);
}
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录