Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
73c5c4a4
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
73c5c4a4
编写于
10月 19, 2017
作者:
Q
qijun
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'baidu/develop' into test_book_1
上级
5c68765a
1f1be6c9
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
146 addition
and
86 deletion
+146
-86
paddle/operators/adam_op.cc
paddle/operators/adam_op.cc
+1
-11
paddle/operators/adam_op.h
paddle/operators/adam_op.h
+2
-11
paddle/operators/adamax_op.cc
paddle/operators/adamax_op.cc
+1
-6
paddle/operators/adamax_op.h
paddle/operators/adamax_op.h
+1
-6
python/paddle/v2/framework/framework.py
python/paddle/v2/framework/framework.py
+3
-1
python/paddle/v2/framework/layers.py
python/paddle/v2/framework/layers.py
+49
-3
python/paddle/v2/framework/nets.py
python/paddle/v2/framework/nets.py
+24
-0
python/paddle/v2/framework/tests/test_adam_op.py
python/paddle/v2/framework/tests/test_adam_op.py
+12
-18
python/paddle/v2/framework/tests/test_adamax_op.py
python/paddle/v2/framework/tests/test_adamax_op.py
+13
-19
python/paddle/v2/framework/tests/test_layers.py
python/paddle/v2/framework/tests/test_layers.py
+40
-11
未找到文件。
paddle/operators/adam_op.cc
浏览文件 @
73c5c4a4
...
...
@@ -43,10 +43,6 @@ class AdamOp : public framework::OperatorWithKernel {
"Output(Moment1Out) of AdamOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Moment2Out"
),
"Output(Moment2Out) of AdamOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Beta1PowOut"
),
"Output(Beta1PowOut) of AdamOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Beta2PowOut"
),
"Output(Beta2PowOut) of AdamOp should not be null."
);
auto
lr_dims
=
ctx
->
GetInputDim
(
"LearningRate"
);
PADDLE_ENFORCE_EQ
(
framework
::
product
(
lr_dims
),
1
,
...
...
@@ -72,8 +68,6 @@ class AdamOp : public framework::OperatorWithKernel {
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dims
);
ctx
->
SetOutputDim
(
"Moment1Out"
,
param_dims
);
ctx
->
SetOutputDim
(
"Moment2Out"
,
param_dims
);
ctx
->
SetOutputDim
(
"Beta1PowOut"
,
beta1_pow_dims
);
ctx
->
SetOutputDim
(
"Beta2PowOut"
,
beta2_pow_dims
);
}
};
...
...
@@ -92,8 +86,6 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"ParamOut"
,
"(Tensor) Output parameter"
);
AddOutput
(
"Moment1Out"
,
"(Tensor) Output first moment"
);
AddOutput
(
"Moment2Out"
,
"(Tensor) Output second moment"
);
AddOutput
(
"Beta1PowOut"
,
"(Tensor) Output beta1 power accumulator"
);
AddOutput
(
"Beta2PowOut"
,
"(Tensor) Output beta2 power accumulator"
);
AddAttr
<
float
>
(
"beta1"
,
"(float, default 0.9) "
...
...
@@ -121,10 +113,8 @@ Adam updates:
moment1_out = beta1 * moment1 + (1 − beta1) * grad
moment2_out = beta2 * moment2 + (1 − beta2) * grad * grad
beta1_pow_out = beta1_pow * beta1
beta2_pow_out = beta2_pow * beta2
learning_rate_t = learning_rate_t *
sqrt(1 - beta2_pow
_out) / (1 - beta1_pow_out
)
sqrt(1 - beta2_pow
) / (1 - beta1_pow
)
param_out = param - learning_rate_t * moment1/ (sqrt(moment2) + epsilon)
References:
...
...
paddle/operators/adam_op.h
浏览文件 @
73c5c4a4
...
...
@@ -26,14 +26,10 @@ class AdamOpKernel : public framework::OpKernel<T> {
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment1_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Moment1Out"
);
auto
moment2_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Moment2Out"
);
auto
beta1_pow_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Beta1PowOut"
);
auto
beta2_pow_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Beta2PowOut"
);
param_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
moment1_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
moment2_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
beta1_pow_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
beta2_pow_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
float
beta1
=
ctx
.
Attr
<
float
>
(
"beta1"
);
float
beta2
=
ctx
.
Attr
<
float
>
(
"beta2"
);
...
...
@@ -56,18 +52,13 @@ class AdamOpKernel : public framework::OpKernel<T> {
auto
param_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param_out_tensor
);
auto
moment1_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
moment1_out_tensor
);
auto
moment2_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
moment2_out_tensor
);
auto
beta1_pow_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
beta1_pow_out_tensor
);
auto
beta2_pow_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
beta2_pow_out_tensor
);
auto
place
=
ctx
.
GetEigenDevice
<
Place
>
();
moment1_out
.
device
(
place
)
=
beta1
*
moment1
+
(
1
-
beta1
)
*
grad
;
moment2_out
.
device
(
place
)
=
beta2
*
moment2
+
(
1
-
beta2
)
*
grad
.
square
();
beta1_pow_out
.
device
(
place
)
=
beta1_pow
*
beta1
;
beta2_pow_out
.
device
(
place
)
=
beta2_pow
*
beta2
;
// All of these are tensors of 1 element
auto
lr_t
=
lr
*
(
1
-
beta2_pow
_out
).
sqrt
()
/
(
1
-
beta1_pow_out
);
auto
lr_t
=
lr
*
(
1
-
beta2_pow
).
sqrt
()
/
(
1
-
beta1_pow
);
// Eigen does not support automatic broadcast
// Get dimensions of moment vector to broadcast lr_t
Eigen
::
DSizes
<
int
,
1
>
m_dsize
(
moment1_out_tensor
->
numel
());
...
...
paddle/operators/adamax_op.cc
浏览文件 @
73c5c4a4
...
...
@@ -41,8 +41,6 @@ class AdamaxOp : public framework::OperatorWithKernel {
"Output(MomentOut) of AdamaxOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"InfNormOut"
),
"Output(InfNormOut) of AdamaxOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Beta1PowOut"
),
"Output(Beta1PowOut) of AdamaxOp should not be null."
);
auto
lr_dims
=
ctx
->
GetInputDim
(
"LearningRate"
);
PADDLE_ENFORCE_EQ
(
framework
::
product
(
lr_dims
),
1
,
...
...
@@ -64,7 +62,6 @@ class AdamaxOp : public framework::OperatorWithKernel {
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dims
);
ctx
->
SetOutputDim
(
"MomentOut"
,
param_dims
);
ctx
->
SetOutputDim
(
"InfNormOut"
,
param_dims
);
ctx
->
SetOutputDim
(
"Beta1PowOut"
,
beta1_pow_dims
);
}
};
...
...
@@ -86,7 +83,6 @@ class AdamaxOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"InfNormOut"
,
"(Tensor) "
"Output exponentially weighted infinity norm"
);
AddOutput
(
"Beta1PowOut"
,
"(Tensor) Output beta1 power accumulator"
);
AddAttr
<
float
>
(
"beta1"
,
"(float, default 0.9) "
...
...
@@ -113,8 +109,7 @@ Adamax updates:
moment_out = beta1 * moment + (1 - beta1) * grad
inf_norm_out = max(beta2 * inf_norm + epsilon, abs(grad))
beta1_pow_out = beta1_pow * beta1
learning_rate_t = learning_rate/(1 - beta1_pow_out)
learning_rate_t = learning_rate/(1 - beta1_pow)
param_out = param - learning_rate_t * moment_out/inf_norm_out
The original paper does not have an epsilon attribute.
...
...
paddle/operators/adamax_op.h
浏览文件 @
73c5c4a4
...
...
@@ -26,12 +26,10 @@ class AdamaxOpKernel : public framework::OpKernel<T> {
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
auto
inf_norm_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"InfNormOut"
);
auto
beta1_pow_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Beta1PowOut"
);
param_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
moment_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
inf_norm_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
beta1_pow_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
float
beta1
=
ctx
.
Attr
<
float
>
(
"beta1"
);
float
beta2
=
ctx
.
Attr
<
float
>
(
"beta2"
);
...
...
@@ -53,15 +51,12 @@ class AdamaxOpKernel : public framework::OpKernel<T> {
auto
moment_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
moment_out_tensor
);
auto
inf_norm_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
inf_norm_out_tensor
);
auto
beta1_pow_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
beta1_pow_out_tensor
);
auto
place
=
ctx
.
GetEigenDevice
<
Place
>
();
moment_out
.
device
(
place
)
=
beta1
*
moment
+
(
1
-
beta1
)
*
grad
;
inf_norm_out
.
device
(
place
)
=
grad
.
abs
().
cwiseMax
((
beta2
*
inf_norm
)
+
epsilon
);
beta1_pow_out
.
device
(
place
)
=
beta1_pow
*
beta1
;
auto
lr_t
=
lr
/
(
1
-
beta1_pow_out
);
auto
lr_t
=
lr
/
(
1
-
beta1_pow
);
Eigen
::
DSizes
<
int
,
1
>
m_dsize
(
moment_out_tensor
->
numel
());
param_out
.
device
(
place
)
=
param
-
lr_t
.
broadcast
(
m_dsize
)
*
(
moment_out
/
inf_norm_out
);
...
...
python/paddle/v2/framework/framework.py
浏览文件 @
73c5c4a4
...
...
@@ -432,11 +432,13 @@ class Program(object):
def
current_block
(
self
):
return
self
.
blocks
[
self
.
current_block_idx
]
def
append_backward
(
self
,
target
,
no_grad_set
):
def
append_backward
(
self
,
target
,
no_grad_set
=
None
):
"""
return map(param_name -> (grad_name, block_index, op_index))
"""
assert
isinstance
(
target
,
Variable
)
if
no_grad_set
is
None
:
no_grad_set
=
set
()
param_to_grad_info
=
self
.
desc
.
append_backward
(
target
.
desc
,
no_grad_set
)
self
.
sync_with_cpp
()
return
param_to_grad_info
...
...
python/paddle/v2/framework/layers.py
浏览文件 @
73c5c4a4
...
...
@@ -3,7 +3,7 @@ import paddle.v2.framework.core as core
from
paddle.v2.framework.framework
import
OpProtoHolder
,
Variable
import
re
__all__
=
[
'fc'
,
'data'
,
'cross_entropy'
,
'conv2d'
]
__all__
=
[
'fc'
,
'data'
,
'cross_entropy'
,
'conv2d'
,
'pool2d'
]
def
fc
(
input
,
...
...
@@ -35,7 +35,10 @@ def fc(input,
"Y"
:
w
,
},
outputs
=
{
"Out"
:
tmp
},
attrs
=
{
'x_num_col_dims'
:
num_flatten_dims
})
attrs
=
{
'x_num_col_dims'
:
num_flatten_dims
,
'y_num_col_dims'
:
len
(
input_shape
)
-
num_flatten_dims
})
mul_results
.
append
(
tmp
)
# sum
...
...
@@ -115,7 +118,6 @@ def _create_op_func_(op_type):
_create_op_func_
(
'mean'
)
_create_op_func_
(
'mul'
)
_create_op_func_
(
'pool2d'
)
def
cross_entropy
(
input
,
label
,
**
kwargs
):
...
...
@@ -170,6 +172,13 @@ def conv2d(input,
raise
ValueError
(
"num_channels must be divisible by groups."
)
num_filter_channels
=
num_channels
/
groups
if
isinstance
(
filter_size
,
int
):
filter_size
=
[
filter_size
,
filter_size
]
if
isinstance
(
stride
,
int
):
stride
=
[
stride
,
stride
]
if
isinstance
(
padding
,
int
):
padding
=
[
padding
,
padding
]
input_shape
=
input
.
shape
filter_shape
=
[
num_filters
,
num_filter_channels
]
+
filter_size
filter
=
helper
.
create_parameter
(
...
...
@@ -190,3 +199,40 @@ def conv2d(input,
pre_act
=
helper
.
append_bias_op
(
pre_bias
)
return
helper
.
append_activation
(
pre_act
)
def
pool2d
(
input
,
pool_size
,
pool_type
,
pool_stride
=
[
1
,
1
],
pool_padding
=
[
0
,
0
],
global_pooling
=
False
,
program
=
None
):
if
pool_type
not
in
[
"max"
,
"avg"
]:
raise
ValueError
(
"Unknown pool_type: '%s'. It can only be 'max' or 'avg'."
,
str
(
pool_type
))
if
isinstance
(
pool_size
,
int
):
pool_size
=
[
pool_size
,
pool_size
]
if
isinstance
(
pool_stride
,
int
):
pool_stride
=
[
pool_stride
,
pool_stride
]
if
isinstance
(
pool_padding
,
int
):
pool_padding
=
[
pool_padding
,
pool_padding
]
helper
=
LayerHelper
(
'conv2d'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
pool_out
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
type
=
"pool2d"
,
inputs
=
{
"X"
:
input
},
outputs
=
{
"Out"
:
pool_out
},
attrs
=
{
"pooling_type"
:
pool_type
,
"ksize"
:
pool_size
,
"global_pooling"
:
global_pooling
,
"strides"
:
pool_stride
,
"paddings"
:
pool_padding
})
return
pool_out
python/paddle/v2/framework/nets.py
0 → 100644
浏览文件 @
73c5c4a4
import
paddle.v2.framework.layers
as
layers
def
simple_img_conv_pool
(
input
,
filter_size
,
num_filters
,
pool_size
,
pool_stride
,
act
,
program
=
None
):
conv_out
=
layers
.
conv2d
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
act
=
act
,
program
=
program
)
pool_out
=
layers
.
pool2d
(
input
=
conv_out
,
pool_size
=
pool_size
,
pool_type
=
'max'
,
pool_stride
=
pool_stride
,
program
=
program
)
return
pool_out
python/paddle/v2/framework/tests/test_adam_op.py
浏览文件 @
73c5c4a4
...
...
@@ -33,14 +33,12 @@ class TestAdamOp1(OpTest):
self
.
attrs
=
{
'epsilon'
:
epsilon
,
'beta1'
:
beta1
,
'beta2'
:
beta2
}
param_out
,
moment1_out
,
moment2_out
,
beta1_pow_out
,
\
beta2_pow
_out
=
adam_step
(
self
.
inputs
,
self
.
attrs
)
param_out
,
moment1_out
,
\
moment2
_out
=
adam_step
(
self
.
inputs
,
self
.
attrs
)
self
.
outputs
=
{
'Moment1Out'
:
moment1_out
,
'Moment2Out'
:
moment2_out
,
'Beta1PowOut'
:
beta1_pow_out
,
'Beta2PowOut'
:
beta2_pow_out
,
'ParamOut'
:
param_out
}
...
...
@@ -78,14 +76,12 @@ class TestAdamOp2(OpTest):
attributes
=
{
'epsilon'
:
epsilon
,
'beta1'
:
beta1
,
'beta2'
:
beta2
}
param_out
,
moment1_out
,
moment2_out
,
beta1_pow_out
,
\
beta2_pow
_out
=
adam_step
(
self
.
inputs
,
attributes
)
param_out
,
moment1_out
,
\
moment2
_out
=
adam_step
(
self
.
inputs
,
attributes
)
self
.
outputs
=
{
'Moment1Out'
:
moment1_out
,
'Moment2Out'
:
moment2_out
,
'Beta1PowOut'
:
beta1_pow_out
,
'Beta2PowOut'
:
beta2_pow_out
,
'ParamOut'
:
param_out
}
...
...
@@ -127,14 +123,12 @@ class TestAdamOpMultipleSteps(OpTest):
def
test_check_output
(
self
):
for
_
in
range
(
self
.
num_steps
):
param_out
,
moment1_out
,
moment2_out
,
beta1_pow_out
,
\
beta2_pow
_out
=
adam_step
(
self
.
inputs
,
self
.
attrs
)
param_out
,
moment1_out
,
\
moment2
_out
=
adam_step
(
self
.
inputs
,
self
.
attrs
)
self
.
outputs
=
{
'Moment1Out'
:
moment1_out
,
'Moment2Out'
:
moment2_out
,
'Beta1PowOut'
:
beta1_pow_out
,
'Beta2PowOut'
:
beta2_pow_out
,
'ParamOut'
:
param_out
}
...
...
@@ -145,8 +139,10 @@ class TestAdamOpMultipleSteps(OpTest):
self
.
inputs
[
'Param'
]
=
param_out
self
.
inputs
[
'Moment1'
]
=
moment1_out
self
.
inputs
[
'Moment2'
]
=
moment2_out
self
.
inputs
[
'Beta1Pow'
]
=
beta1_pow_out
self
.
inputs
[
'Beta2Pow'
]
=
beta2_pow_out
# Update powers of Beta1 and Beta2 for next time step
self
.
inputs
[
'Beta1Pow'
]
*=
self
.
attrs
[
'beta1'
]
self
.
inputs
[
'Beta2Pow'
]
*=
self
.
attrs
[
'beta1'
]
# Randomize gradient for next step
self
.
inputs
[
'Grad'
]
=
np
.
random
.
uniform
(
...
...
@@ -175,11 +171,9 @@ def adam_step(inputs, attributes):
moment1_out
=
beta1
*
moment1
+
(
1
-
beta1
)
*
grad
moment2_out
=
beta2
*
moment2
+
(
1
-
beta2
)
*
np
.
square
(
grad
)
beta1_pow_out
=
beta1_pow
*
beta1
beta2_pow_out
=
beta2_pow
*
beta2
lr_t
=
lr
*
np
.
sqrt
(
1
-
beta2_pow_out
)
/
(
1
-
beta1_pow_out
)
lr_t
=
lr
*
np
.
sqrt
(
1
-
beta2_pow
)
/
(
1
-
beta1_pow
)
param_out
=
param
-
lr_t
*
(
moment1_out
/
(
np
.
sqrt
(
moment2_out
)
+
epsilon
))
return
param_out
,
moment1_out
,
moment2_out
,
beta1_pow_out
,
beta2_pow_out
return
param_out
,
moment1_out
,
moment2_out
if
__name__
==
"__main__"
:
...
...
python/paddle/v2/framework/tests/test_adamax_op.py
浏览文件 @
73c5c4a4
...
...
@@ -31,14 +31,13 @@ class TestAdamaxOp1(OpTest):
self
.
attrs
=
{
'beta1'
:
beta1
,
'beta2'
:
beta2
,
'epsilon'
:
epsilon
}
param_out
,
moment_out
,
inf_norm_out
,
beta1_pow_out
=
adamax_step
(
self
.
inputs
,
self
.
attrs
)
param_out
,
moment_out
,
inf_norm_out
=
adamax_step
(
self
.
inputs
,
self
.
attrs
)
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'MomentOut'
:
moment_out
,
'InfNormOut'
:
inf_norm_out
,
'Beta1PowOut'
:
beta1_pow_out
'InfNormOut'
:
inf_norm_out
}
def
test_check_output
(
self
):
...
...
@@ -73,14 +72,12 @@ class TestAdamaxOp2(OpTest):
}
attrs
=
{
'beta1'
:
beta1
,
'beta2'
:
beta2
,
'epsilon'
:
epsilon
}
param_out
,
moment_out
,
inf_norm_out
,
beta1_pow_out
=
adamax_step
(
self
.
inputs
,
attrs
)
param_out
,
moment_out
,
inf_norm_out
=
adamax_step
(
self
.
inputs
,
attrs
)
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'MomentOut'
:
moment_out
,
'InfNormOut'
:
inf_norm_out
,
'Beta1PowOut'
:
beta1_pow_out
'InfNormOut'
:
inf_norm_out
}
def
test_check_output
(
self
):
...
...
@@ -117,19 +114,15 @@ class TestAdamaxOpMultipleSteps(OpTest):
self
.
attrs
=
{
'beta1'
:
beta1
,
'beta2'
:
beta2
,
'epsilon'
:
epsilon
}
param_out
,
moment_out
,
inf_norm_out
,
beta1_pow_out
=
adamax_step
(
self
.
inputs
,
self
.
attrs
)
def
test_check_output
(
self
):
for
_
in
range
(
self
.
num_steps
):
param_out
,
moment_out
,
inf_norm_out
,
beta1_pow_out
=
adamax_step
(
self
.
inputs
,
self
.
attrs
)
param_out
,
moment_out
,
inf_norm_out
=
adamax_step
(
self
.
inputs
,
self
.
attrs
)
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'MomentOut'
:
moment_out
,
'InfNormOut'
:
inf_norm_out
,
'Beta1PowOut'
:
beta1_pow_out
'InfNormOut'
:
inf_norm_out
}
# Verify output for this step
...
...
@@ -139,7 +132,9 @@ class TestAdamaxOpMultipleSteps(OpTest):
self
.
inputs
[
'Param'
]
=
param_out
self
.
inputs
[
'Moment'
]
=
moment_out
self
.
inputs
[
'InfNorm'
]
=
inf_norm_out
self
.
inputs
[
'Beta1Pow'
]
=
beta1_pow_out
# Update Beta1 Power accumulator for next step
self
.
inputs
[
'Beta1Pow'
]
*=
self
.
attrs
[
'beta1'
]
# Randomize gradient for next step
self
.
inputs
[
'Grad'
]
=
np
.
random
.
uniform
(
...
...
@@ -167,11 +162,10 @@ def adamax_step(inputs, attributes):
moment_out
=
beta1
*
moment
+
(
1
-
beta1
)
*
grad
inf_norm_out
=
np
.
maximum
(
beta2
*
inf_norm
+
epsilon
,
np
.
abs
(
grad
))
beta1_pow_out
=
beta1_pow
*
beta1
lr_t
=
(
lr
/
(
1
-
beta1_pow_out
))
lr_t
=
(
lr
/
(
1
-
beta1_pow
))
param_out
=
param
-
lr_t
*
np
.
divide
(
moment_out
,
inf_norm_out
)
return
param_out
,
moment_out
,
inf_norm_out
,
beta1_pow_out
return
param_out
,
moment_out
,
inf_norm_out
if
__name__
==
"__main__"
:
...
...
python/paddle/v2/framework/tests/test_layers.py
浏览文件 @
73c5c4a4
import
paddle.v2.framework.layers
as
layers
import
paddle.v2.framework.nets
as
nets
from
paddle.v2.framework.framework
import
Program
,
g_program
import
paddle.v2.framework.core
as
core
import
unittest
...
...
@@ -18,7 +19,7 @@ class TestBook(unittest.TestCase):
avg_cost
=
layers
.
mean
(
x
=
cost
,
program
=
program
)
self
.
assertIsNotNone
(
avg_cost
)
program
.
append_backward
(
avg_cost
,
set
()
)
program
.
append_backward
(
avg_cost
)
print
str
(
program
)
def
test_recognize_digits_mlp
(
self
):
...
...
@@ -38,24 +39,52 @@ class TestBook(unittest.TestCase):
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
,
program
=
program
)
avg_cost
=
layers
.
mean
(
x
=
cost
,
program
=
program
)
self
.
assertIsNotNone
(
avg_cost
)
#
print str(program)
print
str
(
program
)
def
test_simple_conv2d
(
self
):
pd
=
core
.
ProgramDesc
.
__create_program_desc__
()
program
=
Program
(
desc
=
pd
)
images
=
data_layer
(
program
=
Program
()
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
3
,
48
,
48
],
data_type
=
'int32'
,
program
=
program
)
conv2d_layer
(
layers
.
conv2d
(
input
=
images
,
num_filters
=
3
,
filter_size
=
[
4
,
4
],
program
=
program
)
#
print str(program)
print
str
(
program
)
def
test_
simple_conv2d
(
self
):
def
test_
recognize_digits_conv
(
self
):
program
=
Program
()
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
3
,
48
,
48
],
data_type
=
'int32'
,
program
=
program
)
layers
.
conv2d
(
input
=
images
,
num_filters
=
3
,
filter_size
=
[
4
,
4
],
program
=
program
)
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
data_type
=
'float32'
,
program
=
program
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
data_type
=
'int32'
,
program
=
program
)
conv_pool_1
=
nets
.
simple_img_conv_pool
(
input
=
images
,
filter_size
=
5
,
num_filters
=
2
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
,
program
=
program
)
conv_pool_2
=
nets
.
simple_img_conv_pool
(
input
=
conv_pool_1
,
filter_size
=
5
,
num_filters
=
4
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
,
program
=
program
)
predict
=
layers
.
fc
(
input
=
conv_pool_2
,
size
=
10
,
act
=
"softmax"
,
program
=
program
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
,
program
=
program
)
avg_cost
=
layers
.
mean
(
x
=
cost
,
program
=
program
)
program
.
append_backward
(
avg_cost
)
print
str
(
program
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录