Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
330c509b
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
330c509b
编写于
10月 17, 2017
作者:
Q
qijun
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'baidu/develop' into sgd_op_sparse_kernel
上级
182ce51c
46f7854e
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
303 addition
and
69 deletion
+303
-69
paddle/framework/backward.cc
paddle/framework/backward.cc
+28
-6
paddle/framework/backward_test.cc
paddle/framework/backward_test.cc
+64
-17
paddle/gserver/gradientmachines/NeuralNetwork.h
paddle/gserver/gradientmachines/NeuralNetwork.h
+1
-1
paddle/operators/mul_op.cc
paddle/operators/mul_op.cc
+4
-4
paddle/pybind/protobuf.cc
paddle/pybind/protobuf.cc
+5
-0
python/paddle/v2/framework/framework.py
python/paddle/v2/framework/framework.py
+16
-1
python/paddle/v2/framework/optimizer.py
python/paddle/v2/framework/optimizer.py
+124
-0
python/paddle/v2/framework/tests/test_optimizer.py
python/paddle/v2/framework/tests/test_optimizer.py
+31
-0
python/paddle/v2/framework/tests/test_program.py
python/paddle/v2/framework/tests/test_program.py
+30
-40
未找到文件。
paddle/framework/backward.cc
浏览文件 @
330c509b
...
...
@@ -281,12 +281,16 @@ static void CreateGradVarInBlock(
auto
ops
=
block_desc
->
AllOps
();
for
(
size_t
op_index
=
grad_op_start_index
;
op_index
<
ops
.
size
();
++
op_index
)
{
bool
need_infer_shape
=
false
;
ForEachVarName
(
ops
[
op_index
]
->
Outputs
(),
[
&
](
const
std
::
string
&
grad_var_name
)
{
if
(
block_desc
->
HasVar
(
grad_var_name
))
{
return
false
;
}
block_desc
->
Var
(
grad_var_name
);
need_infer_shape
=
true
;
auto
var
=
block_desc
->
Var
(
grad_var_name
);
// FIXME(qiao) infer the datatype
var
->
SetDataType
(
framework
::
DataType
::
FP32
);
auto
it
=
param_name_map
.
find
(
grad_var_name
);
if
(
it
==
param_name_map
.
end
())
{
return
false
;
...
...
@@ -298,6 +302,9 @@ static void CreateGradVarInBlock(
grad_record
.
op_idx_
=
static_cast
<
int
>
(
op_index
);
return
false
;
/* not break */
});
if
(
need_infer_shape
)
{
ops
[
op_index
]
->
InferShape
(
*
block_desc
);
}
}
}
...
...
@@ -428,10 +435,16 @@ ParamGradInfoMap AppendBackward(
auto
&
all_ops
=
root_block
->
ops_
;
// insert fill one op for target
// TODO(qiao) add some check to the target.
std
::
string
fill_one_op_out
=
GradVarName
(
target
.
Name
());
std
::
vector
<
int64_t
>
target_shape_desc
=
target
.
Shape
();
std
::
vector
<
int
>
target_shape
;
std
::
transform
(
target_shape_desc
.
begin
(),
target_shape_desc
.
end
(),
std
::
back_inserter
(
target_shape
),
[](
int64_t
dim
)
{
return
static_cast
<
int
>
(
dim
);
});
std
::
unique_ptr
<
OpDescBind
>
fill_one_op
(
new
OpDescBind
(
"fill_constant"
,
{},
{{
"Out"
,
{
fill_one_op_out
}}},
{{
"shape"
,
std
::
vector
<
int
>
{
1
}
},
{{
"shape"
,
target_shape
},
{
"value"
,
static_cast
<
float
>
(
1.0
)},
{
"data_type"
,
framework
::
DataType
::
FP32
}}));
all_ops
.
push_back
(
std
::
move
(
fill_one_op
));
...
...
@@ -443,13 +456,22 @@ ParamGradInfoMap AppendBackward(
auto
backward_op_descs
=
MakeBlockBackward
(
program_desc
,
root_block_idx
,
&
no_grad_var_names
,
&
grad_to_var
);
std
::
unordered_map
<
std
::
string
,
GradVarInfo
>
retv
;
// Create Variable
for
(
auto
&
ptr
:
backward_op_descs
)
{
all_ops
.
push_back
(
std
::
move
(
ptr
));
}
root_block
->
Var
(
fill_one_op_out
);
// Create Variable
// Create target gradient variable
std
::
unordered_map
<
std
::
string
,
GradVarInfo
>
retv
;
auto
var
=
root_block
->
Var
(
fill_one_op_out
);
// FIXME(qiao) infer the data type
var
->
SetDataType
(
framework
::
DataType
::
FP32
);
var
->
SetShape
(
target
.
Shape
());
auto
&
target_grad
=
retv
[
target
.
Name
()];
target_grad
.
name_
=
fill_one_op_out
;
target_grad
.
block_idx_
=
root_block_idx
;
target_grad
.
op_idx_
=
static_cast
<
int
>
(
forward_op_num
);
// create grad_var for all blocks in this program
CreateGradVarInBlock
(
forward_op_num
,
grad_to_var
,
root_block
,
&
retv
);
...
...
paddle/framework/backward_test.cc
浏览文件 @
330c509b
...
...
@@ -26,6 +26,20 @@ namespace framework {
using
DeviceContext
=
platform
::
DeviceContext
;
class
NoneOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{}
};
template
<
typename
Place
,
typename
T
>
class
NoneKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{}
};
class
RowWiseAddOpMaker
:
public
OpProtoAndCheckerMaker
{
public:
RowWiseAddOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
...
...
@@ -215,19 +229,51 @@ class MinusOpMaker : public OpProtoAndCheckerMaker {
namespace
f
=
paddle
::
framework
;
namespace
ops
=
paddle
::
operators
;
using
EnforceNotMet
=
paddle
::
platform
::
EnforceNotMet
;
REGISTER_OPERATOR
(
rowwise_add
,
f
::
NOP
,
f
::
RowWiseAddOpMaker
,
// rowwise_add
REGISTER_OPERATOR
(
rowwise_add
,
f
::
NoneOp
,
f
::
RowWiseAddOpMaker
,
f
::
RowWiseAddGradMaker
);
REGISTER_OPERATOR
(
rowwise_add_grad
,
f
::
NOP
);
REGISTER_OP
(
mul
,
f
::
NOP
,
f
::
MulOpMaker
,
mul_grad
,
f
::
NOP
);
REGISTER_OP
(
sigmoid
,
f
::
NOP
,
f
::
SigmoidOpMaker
,
sigmoid_grad
,
f
::
NOP
);
REGISTER_OP_WITHOUT_GRADIENT
(
nograd
,
f
::
NOP
,
f
::
NoGradOpMaker
);
REGISTER_OP_WITHOUT_GRADIENT
(
fill_zeros_like
,
f
::
NOP
,
f
::
FillZeroOpMaker
);
REGISTER_OP
(
sum
,
f
::
NOP
,
f
::
SumOpMaker
,
sum_grad
,
f
::
NOP
);
REGISTER_OP_CPU_KERNEL
(
rowwise_add
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OPERATOR
(
rowwise_add_grad
,
f
::
NoneOp
);
REGISTER_OP_CPU_KERNEL
(
rowwise_add_grad
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
// mul
REGISTER_OP
(
mul
,
f
::
NoneOp
,
f
::
MulOpMaker
,
mul_grad
,
f
::
NoneOp
);
REGISTER_OP_CPU_KERNEL
(
mul
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
mul_grad
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
// sigmoid
REGISTER_OP
(
sigmoid
,
f
::
NoneOp
,
f
::
SigmoidOpMaker
,
sigmoid_grad
,
f
::
NoneOp
);
REGISTER_OP_CPU_KERNEL
(
sigmoid
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_WITHOUT_GRADIENT
(
nograd
,
f
::
NoneOp
,
f
::
NoGradOpMaker
);
// fill_zeros_like
REGISTER_OP_WITHOUT_GRADIENT
(
fill_zeros_like
,
f
::
NoneOp
,
f
::
FillZeroOpMaker
);
REGISTER_OP_CPU_KERNEL
(
fill_zeros_like
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
// sum
REGISTER_OP
(
sum
,
f
::
NoneOp
,
f
::
SumOpMaker
,
sum_grad
,
f
::
NoneOp
);
REGISTER_OP_CPU_KERNEL
(
sum
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
sum_grad
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
// fc
REGISTER_OP_WITHOUT_GRADIENT
(
fc
,
f
::
FcOp
,
f
::
FcOpMaker
);
REGISTER_OP
(
many_output_op
,
f
::
NOP
,
f
::
ManyOutputOpMaker
,
many_output_op_grad
,
f
::
NOP
);
REGISTER_OP
(
mult_in_out
,
f
::
NOP
,
f
::
MultInOutOpMaker
,
mult_in_out_grad
,
f
::
NOP
);
REGISTER_OPERATOR
(
minus
,
f
::
NOP
,
f
::
MinusOpMaker
,
f
::
MinusGradOpDescMaker
);
// many_output_op
REGISTER_OP
(
many_output_op
,
f
::
NoneOp
,
f
::
ManyOutputOpMaker
,
many_output_op_grad
,
f
::
NoneOp
);
// mult_in_out
REGISTER_OP
(
mult_in_out
,
f
::
NoneOp
,
f
::
MultInOutOpMaker
,
mult_in_out_grad
,
f
::
NoneOp
);
REGISTER_OP_CPU_KERNEL
(
mult_in_out
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
mult_in_out_grad
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
// minus
REGISTER_OPERATOR
(
minus
,
f
::
NoneOp
,
f
::
MinusOpMaker
,
f
::
MinusGradOpDescMaker
);
REGISTER_OP_CPU_KERNEL
(
minus
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
// scale
REGISTER_OPERATOR
(
scale
,
f
::
NoneOp
);
REGISTER_OP_CPU_KERNEL
(
scale
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
TEST
(
Backward
,
simple_op_not_need_grad
)
{
auto
fwd
=
f
::
OpRegistry
::
CreateOp
(
...
...
@@ -463,6 +509,7 @@ TEST(Backward, simple_single_op) {
f
::
ProgramDesc
*
program_desc
=
GetNewProgramDesc
();
f
::
ProgramDescBind
&
program
=
f
::
ProgramDescBind
::
Instance
(
program_desc
);
f
::
BlockDescBind
*
block
=
program
.
Block
(
0
);
f
::
OpDescBind
*
op
=
block
->
AppendOp
();
op
->
SetType
(
"rowwise_add"
);
op
->
SetInput
(
"X"
,
{
"x"
});
...
...
@@ -487,7 +534,7 @@ TEST(Backward, simple_single_op) {
EXPECT_EQ
(
grad_op
->
Output
(
f
::
GradVarName
(
"b"
)),
std
::
vector
<
std
::
string
>
({
f
::
GradVarName
(
"b"
)}));
EXPECT_EQ
(
var_to_grad
.
size
(),
2
UL
);
EXPECT_EQ
(
var_to_grad
.
size
(),
3
UL
);
EXPECT_EQ
(
var_to_grad
.
at
(
"b"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"b"
),
0
,
2
));
EXPECT_EQ
(
var_to_grad
.
at
(
"x"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"x"
),
0
,
2
));
...
...
@@ -588,7 +635,7 @@ TEST(Backward, simple_mult_op) {
EXPECT_EQ
(
grad_op3
->
Output
(
f
::
GradVarName
(
"b"
)),
std
::
vector
<
std
::
string
>
({
f
::
GradVarName
(
"b3"
)}));
EXPECT_EQ
(
var_to_grad
.
size
(),
6
UL
);
EXPECT_EQ
(
var_to_grad
.
size
(),
7
UL
);
EXPECT_EQ
(
var_to_grad
.
at
(
"x1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"x1"
),
0
,
6
));
EXPECT_EQ
(
var_to_grad
.
at
(
"b1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"b1"
),
0
,
6
));
EXPECT_EQ
(
var_to_grad
.
at
(
"out1"
),
...
...
@@ -666,7 +713,7 @@ TEST(Backward, intermedia_var_no_grad) {
std
::
vector
<
std
::
string
>
({
f
::
GradVarName
(
"out1"
)}));
EXPECT_EQ
(
grad_op4
->
Output
(
f
::
GradVarName
(
"Y"
)),
std
::
vector
<
std
::
string
>
());
EXPECT_EQ
(
var_to_grad
.
size
(),
3
UL
);
EXPECT_EQ
(
var_to_grad
.
size
(),
4
UL
);
EXPECT_EQ
(
var_to_grad
.
at
(
"x1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"x1"
),
0
,
6
));
EXPECT_EQ
(
var_to_grad
.
at
(
"b1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"b1"
),
0
,
6
));
EXPECT_EQ
(
var_to_grad
.
at
(
"out1"
),
...
...
@@ -744,7 +791,7 @@ TEST(Backward, var_no_grad) {
EXPECT_EQ
(
grad_op1
->
Output
(
f
::
GradVarName
(
"H"
)),
std
::
vector
<
std
::
string
>
({
f
::
GradVarName
(
"h1"
)}));
EXPECT_EQ
(
var_to_grad
.
size
(),
3
UL
);
EXPECT_EQ
(
var_to_grad
.
size
(),
4
UL
);
EXPECT_EQ
(
var_to_grad
.
at
(
"y1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"y1"
),
0
,
3
));
EXPECT_EQ
(
var_to_grad
.
at
(
"x1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"x1"
),
0
,
5
));
EXPECT_EQ
(
var_to_grad
.
at
(
"h1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"h1"
),
0
,
5
));
...
...
@@ -830,7 +877,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ
(
grad_op1
->
Output
(
f
::
GradVarName
(
"b"
)),
std
::
vector
<
std
::
string
>
({
f
::
GradVarName
(
"b1"
)}));
EXPECT_EQ
(
var_to_grad
.
size
(),
5
UL
);
EXPECT_EQ
(
var_to_grad
.
size
(),
6
UL
);
EXPECT_EQ
(
var_to_grad
.
at
(
"b3"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"b3"
),
0
,
4
));
EXPECT_EQ
(
var_to_grad
.
at
(
"y2"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"y2"
),
0
,
5
));
EXPECT_EQ
(
var_to_grad
.
at
(
"out1"
),
...
...
@@ -863,7 +910,7 @@ TEST(Backward, half_backward) {
auto
ops
=
block
->
AllOps
();
ASSERT_EQ
(
3UL
,
ops
.
size
());
EXPECT_EQ
(
var_to_grad
.
size
(),
1
UL
);
EXPECT_EQ
(
var_to_grad
.
size
(),
2
UL
);
EXPECT_EQ
(
var_to_grad
.
at
(
"a"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"a"
),
0
,
forward_len
+
1
));
}
paddle/gserver/gradientmachines/NeuralNetwork.h
浏览文件 @
330c509b
...
...
@@ -135,7 +135,7 @@ public:
const
std
::
string
&
getName
()
const
{
return
subModelName_
;
}
/// some finish work, like convert the weight format of MKLDNNLayers
void
finish
()
override
;
void
finish
();
protected:
/**
...
...
paddle/operators/mul_op.cc
浏览文件 @
330c509b
...
...
@@ -104,10 +104,10 @@ class MulOpGrad : public framework::OperatorWithKernel {
auto
y_dims
=
ctx
->
GetInputDim
(
"Y"
);
auto
out_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
auto
x_mat_dims
=
framework
::
flatten_to_2d
(
x_dims
,
Attr
<
int
>
(
"x_num_col_dims"
));
auto
y_mat_dims
=
framework
::
flatten_to_2d
(
y_dims
,
Attr
<
int
>
(
"y_num_col_dims"
));
auto
x_mat_dims
=
framework
::
flatten_to_2d
(
x_dims
,
ctx
->
Attrs
().
Get
<
int
>
(
"x_num_col_dims"
));
auto
y_mat_dims
=
framework
::
flatten_to_2d
(
y_dims
,
ctx
->
Attrs
().
Get
<
int
>
(
"y_num_col_dims"
));
PADDLE_ENFORCE_EQ
(
x_mat_dims
[
0
],
out_dims
[
0
],
...
...
paddle/pybind/protobuf.cc
浏览文件 @
330c509b
...
...
@@ -163,6 +163,11 @@ void BindBlockDesc(py::module &m) {
return
self
.
Var
(
name
);
},
py
::
return_value_policy
::
reference
)
.
def
(
"has_var"
,
[](
BlockDescBind
&
self
,
py
::
bytes
byte_name
)
{
std
::
string
name
=
byte_name
;
return
self
.
HasVar
(
name
);
})
.
def
(
"find_var"
,
[](
BlockDescBind
&
self
,
py
::
bytes
byte_name
)
{
std
::
string
name
=
byte_name
;
...
...
python/paddle/v2/framework/framework.py
浏览文件 @
330c509b
...
...
@@ -306,6 +306,14 @@ class Block(object):
def
idx
(
self
):
return
self
.
desc
.
id
def
var
(
self
,
name
):
if
name
not
in
self
.
vars
:
raise
ValueError
(
"var %s not in this block"
%
name
)
return
self
.
vars
[
name
]
def
all_parameters
(
self
):
return
{
v
for
k
,
v
in
self
.
vars
.
iteritems
()
if
isinstance
(
v
,
Parameter
)}
def
create_var
(
self
,
*
args
,
**
kwargs
):
return
Variable
(
self
,
*
args
,
**
kwargs
)
...
...
@@ -314,7 +322,8 @@ class Block(object):
def
create_parameter
(
self
,
*
args
,
**
kwargs
):
global_block
=
self
.
program
.
global_block
()
return
Parameter
(
global_block
,
*
args
,
**
kwargs
)
param
=
Parameter
(
global_block
,
*
args
,
**
kwargs
)
return
param
def
append_op
(
self
,
*
args
,
**
kwargs
):
op_desc
=
self
.
desc
.
append_op
()
...
...
@@ -392,10 +401,16 @@ class Program(object):
def
global_block
(
self
):
return
self
.
blocks
[
0
]
def
block
(
self
,
index
):
return
self
.
blocks
[
index
]
def
current_block
(
self
):
return
self
.
blocks
[
self
.
current_block_idx
]
def
append_backward
(
self
,
target
,
no_grad_set
):
"""
return map(param_name -> (grad_name, block_index, op_index))
"""
assert
isinstance
(
target
,
Variable
)
param_to_grad_info
=
self
.
desc
.
append_backward
(
target
.
desc
,
no_grad_set
)
self
.
sync_with_cpp
()
...
...
python/paddle/v2/framework/optimizer.py
0 → 100644
浏览文件 @
330c509b
import
paddle.v2.framework.framework
as
framework
__all__
=
[
'SGDOptimizer'
]
class
Optimizer
(
object
):
"""Optimizer Base class.
Define the common interface of an optimizer.
User should not use this class directly, but need to use one of it's implementation.
"""
def
__init__
(
self
):
pass
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
""" append optimize operator to block and return all the added optimize_op
"""
raise
NotImplementedError
()
def
create_backward_pass
(
self
,
loss
,
parameter_list
=
None
,
no_grad_set
=
None
):
"""
create and add gradient Operators in BlockDesc to Compute gradients of `loss`
for parameters in parameter_list
Args:
loss: an variable generated by cost function.
no_grad_set: variable that should not create gradient
parameter_list: parameters that need to compute gradient and update to optimize the lost.
Returns:
list of (parameters, gradients) pair.
"""
assert
isinstance
(
loss
,
framework
.
Variable
)
param_grad_map
=
loss
.
block
.
program
.
append_backward
(
loss
,
no_grad_set
or
set
())
if
parameter_list
is
not
None
:
parameters
=
parameter_list
else
:
params
=
loss
.
block
.
program
.
global_block
().
all_parameters
()
parameters
=
[
param
.
name
for
param
in
params
]
params_and_grads
=
[]
for
param
in
parameters
:
if
param
not
in
param_grad_map
:
raise
Exception
(
"param %s is not in map"
%
param
)
grad_info
=
param_grad_map
[
param
]
grad_block
=
loss
.
block
.
program
.
block
(
grad_info
[
1
])
if
not
grad_block
.
has_var
(
grad_info
[
0
]):
raise
Exception
(
"grad block[%d] did not have grad var %s"
%
grad_info
[
1
],
grad_info
[
0
])
param_var
=
loss
.
block
.
var
(
param
)
grad_var
=
grad_block
.
var
(
grad_info
[
0
])
if
loss
.
block
.
has_var
(
grad_info
[
0
]):
params_and_grads
.
append
((
param_var
,
grad_var
))
else
:
params_and_grads
.
append
((
param_var
,
None
))
return
params_and_grads
def
create_optimization_pass
(
self
,
parameters_and_grads
,
loss
):
"""Add optimization operators to update gradients to variables.
Args:
loss: the target that this optimization is for.
parameters_and_grads: a list of (variable, gradient) pair to update.
Returns:
optmization_op_list: a list of optimization operator that will update parameter using gradient.
"""
optimize_ops
=
[]
for
param_and_grad
in
parameters_and_grads
:
if
param_and_grad
[
1
]
is
not
None
:
optimize_op
=
self
.
_append_optimize_op
(
loss
.
block
,
param_and_grad
)
optimize_ops
.
append
(
optimize_op
)
return
optimize_ops
def
minimize
(
self
,
loss
,
parameter_list
=
None
,
no_grad_set
=
None
):
"""Add operations to minimize `loss` by updating `parameter_list`.
This method combines interface `create_backward_pass()` and
`create_optimization_pass()` into one.
"""
params_grads
=
self
.
create_backward_pass
(
loss
,
parameter_list
,
no_grad_set
or
set
())
optimize_ops
=
self
.
create_optimization_pass
(
params_grads
,
loss
)
return
optimize_ops
class
SGDOptimizer
(
Optimizer
):
""" Simple SGD optimizer without any state.
"""
def
__init__
(
self
,
learning_rate
):
assert
learning_rate
is
not
None
super
(
Optimizer
,
self
).
__init__
()
self
.
type
=
"sgd"
self
.
_learning_rate
=
learning_rate
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
assert
isinstance
(
block
,
framework
.
Block
)
lr_shape
=
[
1
]
# create a var for learning_rate
lr
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
lr_shape
,
lod_level
=
0
)
# create an op to init the learning_rate
init_op
=
block
.
append_op
(
type
=
"fill_constant"
,
outputs
=
{
"Out"
:
lr
},
attrs
=
{
"shape"
:
lr_shape
,
"value"
:
self
.
_learning_rate
})
# create the optimize op
sgd_op
=
block
.
append_op
(
type
=
self
.
type
,
inputs
=
{
"Param"
:
param_and_grad
[
0
],
"Grad"
:
param_and_grad
[
1
],
"LearningRate"
:
lr
},
outputs
=
{
"ParamOut"
:
param_and_grad
[
0
]},
attrs
=
{
"shape"
:
[
1
],
"value"
:
self
.
_learning_rate
})
return
sgd_op
python/paddle/v2/framework/tests/test_optimizer.py
0 → 100644
浏览文件 @
330c509b
import
unittest
import
paddle.v2.framework.framework
as
framework
import
paddle.v2.framework.optimizer
as
optimizer
class
TestOptimizer
(
unittest
.
TestCase
):
def
test_sgd_optimizer
(
self
):
program
=
framework
.
g_program
block
=
program
.
global_block
()
mul_x
=
block
.
create_parameter
(
dtype
=
"float32"
,
shape
=
[
5
,
10
],
lod_level
=
0
,
name
=
"mul.x"
)
mul_y
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
10
,
8
],
lod_level
=
0
,
name
=
"mul.y"
)
mul_out
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
5
,
8
],
lod_level
=
0
,
name
=
"mul.out"
)
mul_op
=
block
.
append_op
(
type
=
"mul"
,
inputs
=
{
"X"
:
mul_x
,
"Y"
:
mul_y
},
outputs
=
{
"Out"
:
mul_out
},
attrs
=
{
"x_num_col_dims"
:
1
})
sgd_optimizer
=
optimizer
.
SGDOptimizer
(
learning_rate
=
0.01
)
opts
=
sgd_optimizer
.
minimize
(
mul_out
)
self
.
assertEqual
(
len
(
opts
),
1
)
sgd_op
=
opts
[
0
]
self
.
assertEqual
(
sgd_op
.
type
,
"sgd"
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/framework/tests/test_program.py
浏览文件 @
330c509b
...
...
@@ -34,49 +34,11 @@ class TestProgram(unittest.TestCase):
self
.
assertEqual
(
1
,
b
.
idx
)
self
.
assertEqual
(
0
,
b
.
parent_idx
)
def
test_desc_append_backward
(
self
):
prog
=
core
.
ProgramDesc
.
__create_program_desc__
()
self
.
assertIsNotNone
(
prog
)
block
=
prog
.
block
(
0
)
self
.
assertIsNotNone
(
block
)
mul_op_desc
=
block
.
append_op
()
mul_op_desc
.
set_type
(
"mul"
)
mul_op_desc
.
set_input
(
"X"
,
[
"x1"
])
mul_op_desc
.
set_input
(
"Y"
,
[
"y1"
])
mul_op_desc
.
set_output
(
"Out"
,
[
"out1"
])
sum_op_desc
=
block
.
append_op
()
sum_op_desc
.
set_type
(
"elementwise_add"
)
sum_op_desc
.
set_input
(
"X"
,
[
"out1"
])
sum_op_desc
.
set_input
(
"Y"
,
[
"b1"
])
sum_op_desc
.
set_output
(
"Out"
,
[
"out2"
])
target
=
block
.
var
(
"out2"
)
expect_ops
=
[
"mul"
,
"elementwise_add"
,
"fill_constant"
,
"elementwise_add_grad"
,
"mul_grad"
]
def
grad_name
(
name
):
return
name
+
"@GRAD"
actual_ops
=
[]
param_to_grad
=
prog
.
append_backward
(
target
,
set
())
for
var_name
in
(
"x1"
,
"y1"
,
"out1"
,
"b1"
):
self
.
assertEqual
(
param_to_grad
[
var_name
][
0
],
grad_name
(
var_name
))
self
.
assertEqual
(
param_to_grad
[
var_name
][
1
],
0
)
for
op
in
block
.
all_ops
():
actual_ops
.
append
(
op
.
type
())
self
.
assertEqual
(
actual_ops
,
expect_ops
)
def
test_append_backward
(
self
):
prog
=
Program
.
instance
()
block
=
prog
.
global_block
()
mul_x
=
block
.
create_
paramete
r
(
mul_x
=
block
.
create_
va
r
(
dtype
=
"float32"
,
shape
=
[
5
,
10
],
lod_level
=
0
,
name
=
"mul.x"
)
mul_y
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
10
,
8
],
lod_level
=
0
,
name
=
"mul.y"
)
...
...
@@ -88,7 +50,35 @@ class TestProgram(unittest.TestCase):
"Y"
:
mul_y
},
outputs
=
{
"Out"
:
[
mul_out
]},
attrs
=
{
"x_num_col_dims"
:
1
})
param_to_grad
=
prog
.
append_backward
(
mul_out
,
set
())
add_y
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
5
,
8
],
lod_level
=
0
,
name
=
"add.y"
)
add_out
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
5
,
8
],
lod_level
=
0
,
name
=
"add.out"
)
add_op
=
block
.
append_op
(
type
=
"elementwise_add"
,
inputs
=
{
"X"
:
mul_out
,
"Y"
:
add_y
},
outputs
=
{
"Out"
:
add_out
},
attrs
=
{
"x_num_col_dims"
:
1
})
param_to_grad
=
prog
.
append_backward
(
add_out
,
set
())
def
grad_name
(
name
):
return
name
+
"@GRAD"
for
var_name
in
(
"mul.x"
,
"mul.y"
,
"mul.out"
,
"add.y"
,
"add.out"
):
self
.
assertEqual
(
param_to_grad
[
var_name
][
0
],
grad_name
(
var_name
))
self
.
assertEqual
(
param_to_grad
[
var_name
][
1
],
0
)
expect_ops
=
[
"mul"
,
"elementwise_add"
,
"fill_constant"
,
"elementwise_add_grad"
,
"mul_grad"
]
actual_ops
=
[]
for
op
in
block
.
ops
:
actual_ops
.
append
(
op
.
type
)
self
.
assertEqual
(
actual_ops
,
expect_ops
)
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录