Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
330c509b
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
330c509b
编写于
10月 17, 2017
作者:
Q
qijun
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'baidu/develop' into sgd_op_sparse_kernel
上级
182ce51c
46f7854e
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
303 addition
and
69 deletion
+303
-69
paddle/framework/backward.cc
paddle/framework/backward.cc
+28
-6
paddle/framework/backward_test.cc
paddle/framework/backward_test.cc
+64
-17
paddle/gserver/gradientmachines/NeuralNetwork.h
paddle/gserver/gradientmachines/NeuralNetwork.h
+1
-1
paddle/operators/mul_op.cc
paddle/operators/mul_op.cc
+4
-4
paddle/pybind/protobuf.cc
paddle/pybind/protobuf.cc
+5
-0
python/paddle/v2/framework/framework.py
python/paddle/v2/framework/framework.py
+16
-1
python/paddle/v2/framework/optimizer.py
python/paddle/v2/framework/optimizer.py
+124
-0
python/paddle/v2/framework/tests/test_optimizer.py
python/paddle/v2/framework/tests/test_optimizer.py
+31
-0
python/paddle/v2/framework/tests/test_program.py
python/paddle/v2/framework/tests/test_program.py
+30
-40
未找到文件。
paddle/framework/backward.cc
浏览文件 @
330c509b
...
@@ -281,12 +281,16 @@ static void CreateGradVarInBlock(
...
@@ -281,12 +281,16 @@ static void CreateGradVarInBlock(
auto
ops
=
block_desc
->
AllOps
();
auto
ops
=
block_desc
->
AllOps
();
for
(
size_t
op_index
=
grad_op_start_index
;
op_index
<
ops
.
size
();
for
(
size_t
op_index
=
grad_op_start_index
;
op_index
<
ops
.
size
();
++
op_index
)
{
++
op_index
)
{
bool
need_infer_shape
=
false
;
ForEachVarName
(
ops
[
op_index
]
->
Outputs
(),
ForEachVarName
(
ops
[
op_index
]
->
Outputs
(),
[
&
](
const
std
::
string
&
grad_var_name
)
{
[
&
](
const
std
::
string
&
grad_var_name
)
{
if
(
block_desc
->
HasVar
(
grad_var_name
))
{
if
(
block_desc
->
HasVar
(
grad_var_name
))
{
return
false
;
return
false
;
}
}
block_desc
->
Var
(
grad_var_name
);
need_infer_shape
=
true
;
auto
var
=
block_desc
->
Var
(
grad_var_name
);
// FIXME(qiao) infer the datatype
var
->
SetDataType
(
framework
::
DataType
::
FP32
);
auto
it
=
param_name_map
.
find
(
grad_var_name
);
auto
it
=
param_name_map
.
find
(
grad_var_name
);
if
(
it
==
param_name_map
.
end
())
{
if
(
it
==
param_name_map
.
end
())
{
return
false
;
return
false
;
...
@@ -298,6 +302,9 @@ static void CreateGradVarInBlock(
...
@@ -298,6 +302,9 @@ static void CreateGradVarInBlock(
grad_record
.
op_idx_
=
static_cast
<
int
>
(
op_index
);
grad_record
.
op_idx_
=
static_cast
<
int
>
(
op_index
);
return
false
;
/* not break */
return
false
;
/* not break */
});
});
if
(
need_infer_shape
)
{
ops
[
op_index
]
->
InferShape
(
*
block_desc
);
}
}
}
}
}
...
@@ -428,10 +435,16 @@ ParamGradInfoMap AppendBackward(
...
@@ -428,10 +435,16 @@ ParamGradInfoMap AppendBackward(
auto
&
all_ops
=
root_block
->
ops_
;
auto
&
all_ops
=
root_block
->
ops_
;
// insert fill one op for target
// insert fill one op for target
// TODO(qiao) add some check to the target.
std
::
string
fill_one_op_out
=
GradVarName
(
target
.
Name
());
std
::
string
fill_one_op_out
=
GradVarName
(
target
.
Name
());
std
::
vector
<
int64_t
>
target_shape_desc
=
target
.
Shape
();
std
::
vector
<
int
>
target_shape
;
std
::
transform
(
target_shape_desc
.
begin
(),
target_shape_desc
.
end
(),
std
::
back_inserter
(
target_shape
),
[](
int64_t
dim
)
{
return
static_cast
<
int
>
(
dim
);
});
std
::
unique_ptr
<
OpDescBind
>
fill_one_op
(
std
::
unique_ptr
<
OpDescBind
>
fill_one_op
(
new
OpDescBind
(
"fill_constant"
,
{},
{{
"Out"
,
{
fill_one_op_out
}}},
new
OpDescBind
(
"fill_constant"
,
{},
{{
"Out"
,
{
fill_one_op_out
}}},
{{
"shape"
,
std
::
vector
<
int
>
{
1
}
},
{{
"shape"
,
target_shape
},
{
"value"
,
static_cast
<
float
>
(
1.0
)},
{
"value"
,
static_cast
<
float
>
(
1.0
)},
{
"data_type"
,
framework
::
DataType
::
FP32
}}));
{
"data_type"
,
framework
::
DataType
::
FP32
}}));
all_ops
.
push_back
(
std
::
move
(
fill_one_op
));
all_ops
.
push_back
(
std
::
move
(
fill_one_op
));
...
@@ -443,13 +456,22 @@ ParamGradInfoMap AppendBackward(
...
@@ -443,13 +456,22 @@ ParamGradInfoMap AppendBackward(
auto
backward_op_descs
=
MakeBlockBackward
(
program_desc
,
root_block_idx
,
auto
backward_op_descs
=
MakeBlockBackward
(
program_desc
,
root_block_idx
,
&
no_grad_var_names
,
&
grad_to_var
);
&
no_grad_var_names
,
&
grad_to_var
);
std
::
unordered_map
<
std
::
string
,
GradVarInfo
>
retv
;
// Create Variable
for
(
auto
&
ptr
:
backward_op_descs
)
{
for
(
auto
&
ptr
:
backward_op_descs
)
{
all_ops
.
push_back
(
std
::
move
(
ptr
));
all_ops
.
push_back
(
std
::
move
(
ptr
));
}
}
root_block
->
Var
(
fill_one_op_out
);
// Create Variable
// Create target gradient variable
std
::
unordered_map
<
std
::
string
,
GradVarInfo
>
retv
;
auto
var
=
root_block
->
Var
(
fill_one_op_out
);
// FIXME(qiao) infer the data type
var
->
SetDataType
(
framework
::
DataType
::
FP32
);
var
->
SetShape
(
target
.
Shape
());
auto
&
target_grad
=
retv
[
target
.
Name
()];
target_grad
.
name_
=
fill_one_op_out
;
target_grad
.
block_idx_
=
root_block_idx
;
target_grad
.
op_idx_
=
static_cast
<
int
>
(
forward_op_num
);
// create grad_var for all blocks in this program
// create grad_var for all blocks in this program
CreateGradVarInBlock
(
forward_op_num
,
grad_to_var
,
root_block
,
&
retv
);
CreateGradVarInBlock
(
forward_op_num
,
grad_to_var
,
root_block
,
&
retv
);
...
...
paddle/framework/backward_test.cc
浏览文件 @
330c509b
...
@@ -26,6 +26,20 @@ namespace framework {
...
@@ -26,6 +26,20 @@ namespace framework {
using
DeviceContext
=
platform
::
DeviceContext
;
using
DeviceContext
=
platform
::
DeviceContext
;
class
NoneOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{}
};
template
<
typename
Place
,
typename
T
>
class
NoneKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{}
};
class
RowWiseAddOpMaker
:
public
OpProtoAndCheckerMaker
{
class
RowWiseAddOpMaker
:
public
OpProtoAndCheckerMaker
{
public:
public:
RowWiseAddOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
RowWiseAddOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
...
@@ -215,19 +229,51 @@ class MinusOpMaker : public OpProtoAndCheckerMaker {
...
@@ -215,19 +229,51 @@ class MinusOpMaker : public OpProtoAndCheckerMaker {
namespace
f
=
paddle
::
framework
;
namespace
f
=
paddle
::
framework
;
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
using
EnforceNotMet
=
paddle
::
platform
::
EnforceNotMet
;
using
EnforceNotMet
=
paddle
::
platform
::
EnforceNotMet
;
REGISTER_OPERATOR
(
rowwise_add
,
f
::
NOP
,
f
::
RowWiseAddOpMaker
,
// rowwise_add
REGISTER_OPERATOR
(
rowwise_add
,
f
::
NoneOp
,
f
::
RowWiseAddOpMaker
,
f
::
RowWiseAddGradMaker
);
f
::
RowWiseAddGradMaker
);
REGISTER_OPERATOR
(
rowwise_add_grad
,
f
::
NOP
);
REGISTER_OP_CPU_KERNEL
(
rowwise_add
,
REGISTER_OP
(
mul
,
f
::
NOP
,
f
::
MulOpMaker
,
mul_grad
,
f
::
NOP
);
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP
(
sigmoid
,
f
::
NOP
,
f
::
SigmoidOpMaker
,
sigmoid_grad
,
f
::
NOP
);
REGISTER_OPERATOR
(
rowwise_add_grad
,
f
::
NoneOp
);
REGISTER_OP_WITHOUT_GRADIENT
(
nograd
,
f
::
NOP
,
f
::
NoGradOpMaker
);
REGISTER_OP_CPU_KERNEL
(
rowwise_add_grad
,
REGISTER_OP_WITHOUT_GRADIENT
(
fill_zeros_like
,
f
::
NOP
,
f
::
FillZeroOpMaker
);
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP
(
sum
,
f
::
NOP
,
f
::
SumOpMaker
,
sum_grad
,
f
::
NOP
);
// mul
REGISTER_OP
(
mul
,
f
::
NoneOp
,
f
::
MulOpMaker
,
mul_grad
,
f
::
NoneOp
);
REGISTER_OP_CPU_KERNEL
(
mul
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
mul_grad
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
// sigmoid
REGISTER_OP
(
sigmoid
,
f
::
NoneOp
,
f
::
SigmoidOpMaker
,
sigmoid_grad
,
f
::
NoneOp
);
REGISTER_OP_CPU_KERNEL
(
sigmoid
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_WITHOUT_GRADIENT
(
nograd
,
f
::
NoneOp
,
f
::
NoGradOpMaker
);
// fill_zeros_like
REGISTER_OP_WITHOUT_GRADIENT
(
fill_zeros_like
,
f
::
NoneOp
,
f
::
FillZeroOpMaker
);
REGISTER_OP_CPU_KERNEL
(
fill_zeros_like
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
// sum
REGISTER_OP
(
sum
,
f
::
NoneOp
,
f
::
SumOpMaker
,
sum_grad
,
f
::
NoneOp
);
REGISTER_OP_CPU_KERNEL
(
sum
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
sum_grad
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
// fc
REGISTER_OP_WITHOUT_GRADIENT
(
fc
,
f
::
FcOp
,
f
::
FcOpMaker
);
REGISTER_OP_WITHOUT_GRADIENT
(
fc
,
f
::
FcOp
,
f
::
FcOpMaker
);
REGISTER_OP
(
many_output_op
,
f
::
NOP
,
f
::
ManyOutputOpMaker
,
many_output_op_grad
,
// many_output_op
f
::
NOP
);
REGISTER_OP
(
many_output_op
,
f
::
NoneOp
,
f
::
ManyOutputOpMaker
,
REGISTER_OP
(
mult_in_out
,
f
::
NOP
,
f
::
MultInOutOpMaker
,
mult_in_out_grad
,
f
::
NOP
);
many_output_op_grad
,
f
::
NoneOp
);
REGISTER_OPERATOR
(
minus
,
f
::
NOP
,
f
::
MinusOpMaker
,
f
::
MinusGradOpDescMaker
);
// mult_in_out
REGISTER_OP
(
mult_in_out
,
f
::
NoneOp
,
f
::
MultInOutOpMaker
,
mult_in_out_grad
,
f
::
NoneOp
);
REGISTER_OP_CPU_KERNEL
(
mult_in_out
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
mult_in_out_grad
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
// minus
REGISTER_OPERATOR
(
minus
,
f
::
NoneOp
,
f
::
MinusOpMaker
,
f
::
MinusGradOpDescMaker
);
REGISTER_OP_CPU_KERNEL
(
minus
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
// scale
REGISTER_OPERATOR
(
scale
,
f
::
NoneOp
);
REGISTER_OP_CPU_KERNEL
(
scale
,
f
::
NoneKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
TEST
(
Backward
,
simple_op_not_need_grad
)
{
TEST
(
Backward
,
simple_op_not_need_grad
)
{
auto
fwd
=
f
::
OpRegistry
::
CreateOp
(
auto
fwd
=
f
::
OpRegistry
::
CreateOp
(
...
@@ -463,6 +509,7 @@ TEST(Backward, simple_single_op) {
...
@@ -463,6 +509,7 @@ TEST(Backward, simple_single_op) {
f
::
ProgramDesc
*
program_desc
=
GetNewProgramDesc
();
f
::
ProgramDesc
*
program_desc
=
GetNewProgramDesc
();
f
::
ProgramDescBind
&
program
=
f
::
ProgramDescBind
::
Instance
(
program_desc
);
f
::
ProgramDescBind
&
program
=
f
::
ProgramDescBind
::
Instance
(
program_desc
);
f
::
BlockDescBind
*
block
=
program
.
Block
(
0
);
f
::
BlockDescBind
*
block
=
program
.
Block
(
0
);
f
::
OpDescBind
*
op
=
block
->
AppendOp
();
f
::
OpDescBind
*
op
=
block
->
AppendOp
();
op
->
SetType
(
"rowwise_add"
);
op
->
SetType
(
"rowwise_add"
);
op
->
SetInput
(
"X"
,
{
"x"
});
op
->
SetInput
(
"X"
,
{
"x"
});
...
@@ -487,7 +534,7 @@ TEST(Backward, simple_single_op) {
...
@@ -487,7 +534,7 @@ TEST(Backward, simple_single_op) {
EXPECT_EQ
(
grad_op
->
Output
(
f
::
GradVarName
(
"b"
)),
EXPECT_EQ
(
grad_op
->
Output
(
f
::
GradVarName
(
"b"
)),
std
::
vector
<
std
::
string
>
({
f
::
GradVarName
(
"b"
)}));
std
::
vector
<
std
::
string
>
({
f
::
GradVarName
(
"b"
)}));
EXPECT_EQ
(
var_to_grad
.
size
(),
2
UL
);
EXPECT_EQ
(
var_to_grad
.
size
(),
3
UL
);
EXPECT_EQ
(
var_to_grad
.
at
(
"b"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"b"
),
0
,
2
));
EXPECT_EQ
(
var_to_grad
.
at
(
"b"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"b"
),
0
,
2
));
EXPECT_EQ
(
var_to_grad
.
at
(
"x"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"x"
),
0
,
2
));
EXPECT_EQ
(
var_to_grad
.
at
(
"x"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"x"
),
0
,
2
));
...
@@ -588,7 +635,7 @@ TEST(Backward, simple_mult_op) {
...
@@ -588,7 +635,7 @@ TEST(Backward, simple_mult_op) {
EXPECT_EQ
(
grad_op3
->
Output
(
f
::
GradVarName
(
"b"
)),
EXPECT_EQ
(
grad_op3
->
Output
(
f
::
GradVarName
(
"b"
)),
std
::
vector
<
std
::
string
>
({
f
::
GradVarName
(
"b3"
)}));
std
::
vector
<
std
::
string
>
({
f
::
GradVarName
(
"b3"
)}));
EXPECT_EQ
(
var_to_grad
.
size
(),
6
UL
);
EXPECT_EQ
(
var_to_grad
.
size
(),
7
UL
);
EXPECT_EQ
(
var_to_grad
.
at
(
"x1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"x1"
),
0
,
6
));
EXPECT_EQ
(
var_to_grad
.
at
(
"x1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"x1"
),
0
,
6
));
EXPECT_EQ
(
var_to_grad
.
at
(
"b1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"b1"
),
0
,
6
));
EXPECT_EQ
(
var_to_grad
.
at
(
"b1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"b1"
),
0
,
6
));
EXPECT_EQ
(
var_to_grad
.
at
(
"out1"
),
EXPECT_EQ
(
var_to_grad
.
at
(
"out1"
),
...
@@ -666,7 +713,7 @@ TEST(Backward, intermedia_var_no_grad) {
...
@@ -666,7 +713,7 @@ TEST(Backward, intermedia_var_no_grad) {
std
::
vector
<
std
::
string
>
({
f
::
GradVarName
(
"out1"
)}));
std
::
vector
<
std
::
string
>
({
f
::
GradVarName
(
"out1"
)}));
EXPECT_EQ
(
grad_op4
->
Output
(
f
::
GradVarName
(
"Y"
)),
std
::
vector
<
std
::
string
>
());
EXPECT_EQ
(
grad_op4
->
Output
(
f
::
GradVarName
(
"Y"
)),
std
::
vector
<
std
::
string
>
());
EXPECT_EQ
(
var_to_grad
.
size
(),
3
UL
);
EXPECT_EQ
(
var_to_grad
.
size
(),
4
UL
);
EXPECT_EQ
(
var_to_grad
.
at
(
"x1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"x1"
),
0
,
6
));
EXPECT_EQ
(
var_to_grad
.
at
(
"x1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"x1"
),
0
,
6
));
EXPECT_EQ
(
var_to_grad
.
at
(
"b1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"b1"
),
0
,
6
));
EXPECT_EQ
(
var_to_grad
.
at
(
"b1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"b1"
),
0
,
6
));
EXPECT_EQ
(
var_to_grad
.
at
(
"out1"
),
EXPECT_EQ
(
var_to_grad
.
at
(
"out1"
),
...
@@ -744,7 +791,7 @@ TEST(Backward, var_no_grad) {
...
@@ -744,7 +791,7 @@ TEST(Backward, var_no_grad) {
EXPECT_EQ
(
grad_op1
->
Output
(
f
::
GradVarName
(
"H"
)),
EXPECT_EQ
(
grad_op1
->
Output
(
f
::
GradVarName
(
"H"
)),
std
::
vector
<
std
::
string
>
({
f
::
GradVarName
(
"h1"
)}));
std
::
vector
<
std
::
string
>
({
f
::
GradVarName
(
"h1"
)}));
EXPECT_EQ
(
var_to_grad
.
size
(),
3
UL
);
EXPECT_EQ
(
var_to_grad
.
size
(),
4
UL
);
EXPECT_EQ
(
var_to_grad
.
at
(
"y1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"y1"
),
0
,
3
));
EXPECT_EQ
(
var_to_grad
.
at
(
"y1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"y1"
),
0
,
3
));
EXPECT_EQ
(
var_to_grad
.
at
(
"x1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"x1"
),
0
,
5
));
EXPECT_EQ
(
var_to_grad
.
at
(
"x1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"x1"
),
0
,
5
));
EXPECT_EQ
(
var_to_grad
.
at
(
"h1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"h1"
),
0
,
5
));
EXPECT_EQ
(
var_to_grad
.
at
(
"h1"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"h1"
),
0
,
5
));
...
@@ -830,7 +877,7 @@ TEST(Backward, shared_var) {
...
@@ -830,7 +877,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ
(
grad_op1
->
Output
(
f
::
GradVarName
(
"b"
)),
EXPECT_EQ
(
grad_op1
->
Output
(
f
::
GradVarName
(
"b"
)),
std
::
vector
<
std
::
string
>
({
f
::
GradVarName
(
"b1"
)}));
std
::
vector
<
std
::
string
>
({
f
::
GradVarName
(
"b1"
)}));
EXPECT_EQ
(
var_to_grad
.
size
(),
5
UL
);
EXPECT_EQ
(
var_to_grad
.
size
(),
6
UL
);
EXPECT_EQ
(
var_to_grad
.
at
(
"b3"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"b3"
),
0
,
4
));
EXPECT_EQ
(
var_to_grad
.
at
(
"b3"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"b3"
),
0
,
4
));
EXPECT_EQ
(
var_to_grad
.
at
(
"y2"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"y2"
),
0
,
5
));
EXPECT_EQ
(
var_to_grad
.
at
(
"y2"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"y2"
),
0
,
5
));
EXPECT_EQ
(
var_to_grad
.
at
(
"out1"
),
EXPECT_EQ
(
var_to_grad
.
at
(
"out1"
),
...
@@ -863,7 +910,7 @@ TEST(Backward, half_backward) {
...
@@ -863,7 +910,7 @@ TEST(Backward, half_backward) {
auto
ops
=
block
->
AllOps
();
auto
ops
=
block
->
AllOps
();
ASSERT_EQ
(
3UL
,
ops
.
size
());
ASSERT_EQ
(
3UL
,
ops
.
size
());
EXPECT_EQ
(
var_to_grad
.
size
(),
1
UL
);
EXPECT_EQ
(
var_to_grad
.
size
(),
2
UL
);
EXPECT_EQ
(
var_to_grad
.
at
(
"a"
),
EXPECT_EQ
(
var_to_grad
.
at
(
"a"
),
f
::
GradVarInfo
(
f
::
GradVarName
(
"a"
),
0
,
forward_len
+
1
));
f
::
GradVarInfo
(
f
::
GradVarName
(
"a"
),
0
,
forward_len
+
1
));
}
}
paddle/gserver/gradientmachines/NeuralNetwork.h
浏览文件 @
330c509b
...
@@ -135,7 +135,7 @@ public:
...
@@ -135,7 +135,7 @@ public:
const
std
::
string
&
getName
()
const
{
return
subModelName_
;
}
const
std
::
string
&
getName
()
const
{
return
subModelName_
;
}
/// some finish work, like convert the weight format of MKLDNNLayers
/// some finish work, like convert the weight format of MKLDNNLayers
void
finish
()
override
;
void
finish
();
protected:
protected:
/**
/**
...
...
paddle/operators/mul_op.cc
浏览文件 @
330c509b
...
@@ -104,10 +104,10 @@ class MulOpGrad : public framework::OperatorWithKernel {
...
@@ -104,10 +104,10 @@ class MulOpGrad : public framework::OperatorWithKernel {
auto
y_dims
=
ctx
->
GetInputDim
(
"Y"
);
auto
y_dims
=
ctx
->
GetInputDim
(
"Y"
);
auto
out_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
auto
out_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
auto
x_mat_dims
=
auto
x_mat_dims
=
framework
::
flatten_to_2d
(
framework
::
flatten_to_2d
(
x_dims
,
Attr
<
int
>
(
"x_num_col_dims"
));
x_dims
,
ctx
->
Attrs
().
Get
<
int
>
(
"x_num_col_dims"
));
auto
y_mat_dims
=
auto
y_mat_dims
=
framework
::
flatten_to_2d
(
framework
::
flatten_to_2d
(
y_dims
,
Attr
<
int
>
(
"y_num_col_dims"
));
y_dims
,
ctx
->
Attrs
().
Get
<
int
>
(
"y_num_col_dims"
));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
x_mat_dims
[
0
],
out_dims
[
0
],
x_mat_dims
[
0
],
out_dims
[
0
],
...
...
paddle/pybind/protobuf.cc
浏览文件 @
330c509b
...
@@ -163,6 +163,11 @@ void BindBlockDesc(py::module &m) {
...
@@ -163,6 +163,11 @@ void BindBlockDesc(py::module &m) {
return
self
.
Var
(
name
);
return
self
.
Var
(
name
);
},
},
py
::
return_value_policy
::
reference
)
py
::
return_value_policy
::
reference
)
.
def
(
"has_var"
,
[](
BlockDescBind
&
self
,
py
::
bytes
byte_name
)
{
std
::
string
name
=
byte_name
;
return
self
.
HasVar
(
name
);
})
.
def
(
"find_var"
,
.
def
(
"find_var"
,
[](
BlockDescBind
&
self
,
py
::
bytes
byte_name
)
{
[](
BlockDescBind
&
self
,
py
::
bytes
byte_name
)
{
std
::
string
name
=
byte_name
;
std
::
string
name
=
byte_name
;
...
...
python/paddle/v2/framework/framework.py
浏览文件 @
330c509b
...
@@ -306,6 +306,14 @@ class Block(object):
...
@@ -306,6 +306,14 @@ class Block(object):
def
idx
(
self
):
def
idx
(
self
):
return
self
.
desc
.
id
return
self
.
desc
.
id
def
var
(
self
,
name
):
if
name
not
in
self
.
vars
:
raise
ValueError
(
"var %s not in this block"
%
name
)
return
self
.
vars
[
name
]
def
all_parameters
(
self
):
return
{
v
for
k
,
v
in
self
.
vars
.
iteritems
()
if
isinstance
(
v
,
Parameter
)}
def
create_var
(
self
,
*
args
,
**
kwargs
):
def
create_var
(
self
,
*
args
,
**
kwargs
):
return
Variable
(
self
,
*
args
,
**
kwargs
)
return
Variable
(
self
,
*
args
,
**
kwargs
)
...
@@ -314,7 +322,8 @@ class Block(object):
...
@@ -314,7 +322,8 @@ class Block(object):
def
create_parameter
(
self
,
*
args
,
**
kwargs
):
def
create_parameter
(
self
,
*
args
,
**
kwargs
):
global_block
=
self
.
program
.
global_block
()
global_block
=
self
.
program
.
global_block
()
return
Parameter
(
global_block
,
*
args
,
**
kwargs
)
param
=
Parameter
(
global_block
,
*
args
,
**
kwargs
)
return
param
def
append_op
(
self
,
*
args
,
**
kwargs
):
def
append_op
(
self
,
*
args
,
**
kwargs
):
op_desc
=
self
.
desc
.
append_op
()
op_desc
=
self
.
desc
.
append_op
()
...
@@ -392,10 +401,16 @@ class Program(object):
...
@@ -392,10 +401,16 @@ class Program(object):
def
global_block
(
self
):
def
global_block
(
self
):
return
self
.
blocks
[
0
]
return
self
.
blocks
[
0
]
def
block
(
self
,
index
):
return
self
.
blocks
[
index
]
def
current_block
(
self
):
def
current_block
(
self
):
return
self
.
blocks
[
self
.
current_block_idx
]
return
self
.
blocks
[
self
.
current_block_idx
]
def
append_backward
(
self
,
target
,
no_grad_set
):
def
append_backward
(
self
,
target
,
no_grad_set
):
"""
return map(param_name -> (grad_name, block_index, op_index))
"""
assert
isinstance
(
target
,
Variable
)
assert
isinstance
(
target
,
Variable
)
param_to_grad_info
=
self
.
desc
.
append_backward
(
target
.
desc
,
no_grad_set
)
param_to_grad_info
=
self
.
desc
.
append_backward
(
target
.
desc
,
no_grad_set
)
self
.
sync_with_cpp
()
self
.
sync_with_cpp
()
...
...
python/paddle/v2/framework/optimizer.py
0 → 100644
浏览文件 @
330c509b
import
paddle.v2.framework.framework
as
framework
__all__
=
[
'SGDOptimizer'
]
class
Optimizer
(
object
):
"""Optimizer Base class.
Define the common interface of an optimizer.
User should not use this class directly, but need to use one of it's implementation.
"""
def
__init__
(
self
):
pass
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
""" append optimize operator to block and return all the added optimize_op
"""
raise
NotImplementedError
()
def
create_backward_pass
(
self
,
loss
,
parameter_list
=
None
,
no_grad_set
=
None
):
"""
create and add gradient Operators in BlockDesc to Compute gradients of `loss`
for parameters in parameter_list
Args:
loss: an variable generated by cost function.
no_grad_set: variable that should not create gradient
parameter_list: parameters that need to compute gradient and update to optimize the lost.
Returns:
list of (parameters, gradients) pair.
"""
assert
isinstance
(
loss
,
framework
.
Variable
)
param_grad_map
=
loss
.
block
.
program
.
append_backward
(
loss
,
no_grad_set
or
set
())
if
parameter_list
is
not
None
:
parameters
=
parameter_list
else
:
params
=
loss
.
block
.
program
.
global_block
().
all_parameters
()
parameters
=
[
param
.
name
for
param
in
params
]
params_and_grads
=
[]
for
param
in
parameters
:
if
param
not
in
param_grad_map
:
raise
Exception
(
"param %s is not in map"
%
param
)
grad_info
=
param_grad_map
[
param
]
grad_block
=
loss
.
block
.
program
.
block
(
grad_info
[
1
])
if
not
grad_block
.
has_var
(
grad_info
[
0
]):
raise
Exception
(
"grad block[%d] did not have grad var %s"
%
grad_info
[
1
],
grad_info
[
0
])
param_var
=
loss
.
block
.
var
(
param
)
grad_var
=
grad_block
.
var
(
grad_info
[
0
])
if
loss
.
block
.
has_var
(
grad_info
[
0
]):
params_and_grads
.
append
((
param_var
,
grad_var
))
else
:
params_and_grads
.
append
((
param_var
,
None
))
return
params_and_grads
def
create_optimization_pass
(
self
,
parameters_and_grads
,
loss
):
"""Add optimization operators to update gradients to variables.
Args:
loss: the target that this optimization is for.
parameters_and_grads: a list of (variable, gradient) pair to update.
Returns:
optmization_op_list: a list of optimization operator that will update parameter using gradient.
"""
optimize_ops
=
[]
for
param_and_grad
in
parameters_and_grads
:
if
param_and_grad
[
1
]
is
not
None
:
optimize_op
=
self
.
_append_optimize_op
(
loss
.
block
,
param_and_grad
)
optimize_ops
.
append
(
optimize_op
)
return
optimize_ops
def
minimize
(
self
,
loss
,
parameter_list
=
None
,
no_grad_set
=
None
):
"""Add operations to minimize `loss` by updating `parameter_list`.
This method combines interface `create_backward_pass()` and
`create_optimization_pass()` into one.
"""
params_grads
=
self
.
create_backward_pass
(
loss
,
parameter_list
,
no_grad_set
or
set
())
optimize_ops
=
self
.
create_optimization_pass
(
params_grads
,
loss
)
return
optimize_ops
class
SGDOptimizer
(
Optimizer
):
""" Simple SGD optimizer without any state.
"""
def
__init__
(
self
,
learning_rate
):
assert
learning_rate
is
not
None
super
(
Optimizer
,
self
).
__init__
()
self
.
type
=
"sgd"
self
.
_learning_rate
=
learning_rate
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
assert
isinstance
(
block
,
framework
.
Block
)
lr_shape
=
[
1
]
# create a var for learning_rate
lr
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
lr_shape
,
lod_level
=
0
)
# create an op to init the learning_rate
init_op
=
block
.
append_op
(
type
=
"fill_constant"
,
outputs
=
{
"Out"
:
lr
},
attrs
=
{
"shape"
:
lr_shape
,
"value"
:
self
.
_learning_rate
})
# create the optimize op
sgd_op
=
block
.
append_op
(
type
=
self
.
type
,
inputs
=
{
"Param"
:
param_and_grad
[
0
],
"Grad"
:
param_and_grad
[
1
],
"LearningRate"
:
lr
},
outputs
=
{
"ParamOut"
:
param_and_grad
[
0
]},
attrs
=
{
"shape"
:
[
1
],
"value"
:
self
.
_learning_rate
})
return
sgd_op
python/paddle/v2/framework/tests/test_optimizer.py
0 → 100644
浏览文件 @
330c509b
import
unittest
import
paddle.v2.framework.framework
as
framework
import
paddle.v2.framework.optimizer
as
optimizer
class
TestOptimizer
(
unittest
.
TestCase
):
def
test_sgd_optimizer
(
self
):
program
=
framework
.
g_program
block
=
program
.
global_block
()
mul_x
=
block
.
create_parameter
(
dtype
=
"float32"
,
shape
=
[
5
,
10
],
lod_level
=
0
,
name
=
"mul.x"
)
mul_y
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
10
,
8
],
lod_level
=
0
,
name
=
"mul.y"
)
mul_out
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
5
,
8
],
lod_level
=
0
,
name
=
"mul.out"
)
mul_op
=
block
.
append_op
(
type
=
"mul"
,
inputs
=
{
"X"
:
mul_x
,
"Y"
:
mul_y
},
outputs
=
{
"Out"
:
mul_out
},
attrs
=
{
"x_num_col_dims"
:
1
})
sgd_optimizer
=
optimizer
.
SGDOptimizer
(
learning_rate
=
0.01
)
opts
=
sgd_optimizer
.
minimize
(
mul_out
)
self
.
assertEqual
(
len
(
opts
),
1
)
sgd_op
=
opts
[
0
]
self
.
assertEqual
(
sgd_op
.
type
,
"sgd"
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/framework/tests/test_program.py
浏览文件 @
330c509b
...
@@ -34,49 +34,11 @@ class TestProgram(unittest.TestCase):
...
@@ -34,49 +34,11 @@ class TestProgram(unittest.TestCase):
self
.
assertEqual
(
1
,
b
.
idx
)
self
.
assertEqual
(
1
,
b
.
idx
)
self
.
assertEqual
(
0
,
b
.
parent_idx
)
self
.
assertEqual
(
0
,
b
.
parent_idx
)
def
test_desc_append_backward
(
self
):
prog
=
core
.
ProgramDesc
.
__create_program_desc__
()
self
.
assertIsNotNone
(
prog
)
block
=
prog
.
block
(
0
)
self
.
assertIsNotNone
(
block
)
mul_op_desc
=
block
.
append_op
()
mul_op_desc
.
set_type
(
"mul"
)
mul_op_desc
.
set_input
(
"X"
,
[
"x1"
])
mul_op_desc
.
set_input
(
"Y"
,
[
"y1"
])
mul_op_desc
.
set_output
(
"Out"
,
[
"out1"
])
sum_op_desc
=
block
.
append_op
()
sum_op_desc
.
set_type
(
"elementwise_add"
)
sum_op_desc
.
set_input
(
"X"
,
[
"out1"
])
sum_op_desc
.
set_input
(
"Y"
,
[
"b1"
])
sum_op_desc
.
set_output
(
"Out"
,
[
"out2"
])
target
=
block
.
var
(
"out2"
)
expect_ops
=
[
"mul"
,
"elementwise_add"
,
"fill_constant"
,
"elementwise_add_grad"
,
"mul_grad"
]
def
grad_name
(
name
):
return
name
+
"@GRAD"
actual_ops
=
[]
param_to_grad
=
prog
.
append_backward
(
target
,
set
())
for
var_name
in
(
"x1"
,
"y1"
,
"out1"
,
"b1"
):
self
.
assertEqual
(
param_to_grad
[
var_name
][
0
],
grad_name
(
var_name
))
self
.
assertEqual
(
param_to_grad
[
var_name
][
1
],
0
)
for
op
in
block
.
all_ops
():
actual_ops
.
append
(
op
.
type
())
self
.
assertEqual
(
actual_ops
,
expect_ops
)
def
test_append_backward
(
self
):
def
test_append_backward
(
self
):
prog
=
Program
.
instance
()
prog
=
Program
.
instance
()
block
=
prog
.
global_block
()
block
=
prog
.
global_block
()
mul_x
=
block
.
create_
paramete
r
(
mul_x
=
block
.
create_
va
r
(
dtype
=
"float32"
,
shape
=
[
5
,
10
],
lod_level
=
0
,
name
=
"mul.x"
)
dtype
=
"float32"
,
shape
=
[
5
,
10
],
lod_level
=
0
,
name
=
"mul.x"
)
mul_y
=
block
.
create_var
(
mul_y
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
10
,
8
],
lod_level
=
0
,
name
=
"mul.y"
)
dtype
=
"float32"
,
shape
=
[
10
,
8
],
lod_level
=
0
,
name
=
"mul.y"
)
...
@@ -88,7 +50,35 @@ class TestProgram(unittest.TestCase):
...
@@ -88,7 +50,35 @@ class TestProgram(unittest.TestCase):
"Y"
:
mul_y
},
"Y"
:
mul_y
},
outputs
=
{
"Out"
:
[
mul_out
]},
outputs
=
{
"Out"
:
[
mul_out
]},
attrs
=
{
"x_num_col_dims"
:
1
})
attrs
=
{
"x_num_col_dims"
:
1
})
param_to_grad
=
prog
.
append_backward
(
mul_out
,
set
())
add_y
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
5
,
8
],
lod_level
=
0
,
name
=
"add.y"
)
add_out
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
5
,
8
],
lod_level
=
0
,
name
=
"add.out"
)
add_op
=
block
.
append_op
(
type
=
"elementwise_add"
,
inputs
=
{
"X"
:
mul_out
,
"Y"
:
add_y
},
outputs
=
{
"Out"
:
add_out
},
attrs
=
{
"x_num_col_dims"
:
1
})
param_to_grad
=
prog
.
append_backward
(
add_out
,
set
())
def
grad_name
(
name
):
return
name
+
"@GRAD"
for
var_name
in
(
"mul.x"
,
"mul.y"
,
"mul.out"
,
"add.y"
,
"add.out"
):
self
.
assertEqual
(
param_to_grad
[
var_name
][
0
],
grad_name
(
var_name
))
self
.
assertEqual
(
param_to_grad
[
var_name
][
1
],
0
)
expect_ops
=
[
"mul"
,
"elementwise_add"
,
"fill_constant"
,
"elementwise_add_grad"
,
"mul_grad"
]
actual_ops
=
[]
for
op
in
block
.
ops
:
actual_ops
.
append
(
op
.
type
)
self
.
assertEqual
(
actual_ops
,
expect_ops
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录