Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
fd8c5107
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
fd8c5107
编写于
6月 06, 2017
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
"format name with google style"
上级
5ab958ba
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
142 addition
and
155 deletion
+142
-155
paddle/optimizer/CMakeLists.txt
paddle/optimizer/CMakeLists.txt
+3
-3
paddle/optimizer/Tensor.h
paddle/optimizer/Tensor.h
+1
-1
paddle/optimizer/Tensor_test.cpp
paddle/optimizer/Tensor_test.cpp
+2
-1
paddle/optimizer/adadelta_optimizer.cc
paddle/optimizer/adadelta_optimizer.cc
+15
-14
paddle/optimizer/adadelta_optimizer.h
paddle/optimizer/adadelta_optimizer.h
+12
-16
paddle/optimizer/adagrad_optimizer.cc
paddle/optimizer/adagrad_optimizer.cc
+8
-7
paddle/optimizer/adagrad_optimizer.h
paddle/optimizer/adagrad_optimizer.h
+7
-7
paddle/optimizer/adam_optimizer.cc
paddle/optimizer/adam_optimizer.cc
+9
-8
paddle/optimizer/adam_optimizer.h
paddle/optimizer/adam_optimizer.h
+10
-10
paddle/optimizer/lr_policy.h
paddle/optimizer/lr_policy.h
+13
-13
paddle/optimizer/optimizer.cc
paddle/optimizer/optimizer.cc
+2
-2
paddle/optimizer/optimizer_test.cpp
paddle/optimizer/optimizer_test.cpp
+0
-11
paddle/optimizer/parameter_optimizer.cc
paddle/optimizer/parameter_optimizer.cc
+12
-13
paddle/optimizer/parameter_optimizer.h
paddle/optimizer/parameter_optimizer.h
+7
-7
paddle/optimizer/parameter_optimizer_test.cpp
paddle/optimizer/parameter_optimizer_test.cpp
+7
-7
paddle/optimizer/sgd_optimizer.h
paddle/optimizer/sgd_optimizer.h
+6
-10
paddle/optimizer/sgd_optmizer.cc
paddle/optimizer/sgd_optmizer.cc
+11
-10
proto/OptimizerConfig.proto
proto/OptimizerConfig.proto
+17
-15
未找到文件。
paddle/optimizer/CMakeLists.txt
浏览文件 @
fd8c5107
...
...
@@ -9,7 +9,7 @@ set(OPITMIZER_SRCS
sgd_optmizer.cc
)
set
(
OPITMIZER_H
eaders
set
(
OPITMIZER_H
EADERS
adadelta_optimizer.h
adagrad_optimizer.h
adam_optimizer.h
...
...
@@ -17,12 +17,12 @@ set(OPITMIZER_Headers
optimizer.h
parameter_optimizer.h
sgd_optimizer.h
T
ensor.h
t
ensor.h
)
add_library
(
optimizer STATIC
${
OPITMIZER_SRCS
}
)
add_dependencies
(
optimizer gen_proto_cpp
)
add_simple_unittest
(
T
ensor_test
)
add_simple_unittest
(
t
ensor_test
)
add_simple_unittest
(
parameter_optimizer_test
)
add_dependencies
(
parameter_optimizer_test optimizer
)
paddle/optimizer/Tensor.h
浏览文件 @
fd8c5107
...
...
@@ -32,7 +32,7 @@ public:
return
data_
[
idx
];
}
// TODO: replace with tensorshape
size_t
size
()
const
{
return
this
->
width_
;
}
size_t
size
()
const
{
return
this
->
width_
*
this
->
height_
;
}
protected:
size_t
height_
;
...
...
paddle/optimizer/Tensor_test.cpp
浏览文件 @
fd8c5107
#include "Tensor.h"
#include <iostream>
#include "gtest/gtest.h"
#include "tensor.h"
using
namespace
paddle
;
using
namespace
paddle
::
optimizer
;
...
...
@@ -13,6 +13,7 @@ TEST(Tensor, indexer) {
}
ASSERT_EQ
(
t
[
2
],
2
);
ASSERT_EQ
(
t
[
1
],
1
);
delete
ptr
;
}
int
main
(
int
argc
,
char
**
argv
)
{
...
...
paddle/optimizer/adadelta_optimizer.cc
浏览文件 @
fd8c5107
...
...
@@ -6,32 +6,33 @@ namespace paddle {
namespace
optimizer
{
void
AdadeltaOptimizer
::
set_weight
(
Tensor
*
p
)
{
parameter_
=
p
;
size_t
size
=
p
->
size
();
real
*
gptr
=
new
real
[
size
];
accum_gradient
=
new
Tensor
(
gptr
,
size
);
accum_gradient
_
=
new
Tensor
(
gptr
,
size
);
real
*
dptr
=
new
real
[
size
];
accum_delta
=
new
Tensor
(
dptr
,
size
);
accum_delta
_
=
new
Tensor
(
dptr
,
size
);
real
*
dptr_current
=
new
real
[
size
];
update_delta
=
new
Tensor
(
dptr_current
,
size
);
update_delta
_
=
new
Tensor
(
dptr_current
,
size
);
}
void
AdadeltaOptimizer
::
u
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
+=
1
;
double
learning_rate
=
lr_policy
->
get_learning_rate
(
num_sample_passed
);
void
AdadeltaOptimizer
::
U
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
_
+=
1
;
double
learning_rate
=
lr_policy
_
->
LearningRate
(
num_sample_passed_
);
Tensor
&
param
=
*
parameter_
;
const
Tensor
&
grad
=
*
gradient
;
Tensor
&
accum_g
=
*
accum_gradient
;
Tensor
&
accum_d
=
*
accum_delta
;
Tensor
&
update_d
=
*
update_delta
;
Tensor
&
accum_g
=
*
accum_gradient
_
;
Tensor
&
accum_d
=
*
accum_delta
_
;
Tensor
&
update_d
=
*
update_delta
_
;
for
(
size_t
i
=
0
;
i
<
param
.
size
();
++
i
)
{
accum_g
[
i
]
=
rho
*
accum_g
[
i
]
+
(
1.0
-
rho
)
*
grad
[
i
]
*
grad
[
i
];
accum_g
[
i
]
=
rho
_
*
accum_g
[
i
]
+
(
1.0
-
rho_
)
*
grad
[
i
]
*
grad
[
i
];
update_d
[
i
]
=
std
::
sqrt
(
accum_d
[
i
]
+
epsilon
)
/
std
::
sqrt
(
accum_g
[
i
]
+
epsilon
)
*
grad
[
i
];
update_d
[
i
]
=
std
::
sqrt
(
accum_d
[
i
]
+
epsilon
_
)
/
std
::
sqrt
(
accum_g
[
i
]
+
epsilon
_
)
*
grad
[
i
];
accum_d
[
i
]
=
rho
*
accum_d
[
i
]
+
(
1.0
-
rho
)
*
update_d
[
i
]
*
update_d
[
i
];
accum_d
[
i
]
=
rho
_
*
accum_d
[
i
]
+
(
1.0
-
rho_
)
*
update_d
[
i
]
*
update_d
[
i
];
param
[
i
]
-=
learning_rate
*
update_d
[
i
]
+
learning_rate
*
decay
*
param
[
i
];
param
[
i
]
-=
learning_rate
*
update_d
[
i
]
+
learning_rate
*
decay
_
*
param
[
i
];
}
}
}
// namespace optimizer
...
...
paddle/optimizer/adadelta_optimizer.h
浏览文件 @
fd8c5107
...
...
@@ -8,29 +8,25 @@ namespace optimizer {
class
AdadeltaOptimizer
:
public
ParameterOptimizer
{
public:
using
ParameterOptimizer
::
parameter_
;
using
ParameterOptimizer
::
num_sample_passed
;
using
ParameterOptimizer
::
lr_policy
;
AdadeltaOptimizer
(
double
rho
,
double
epsilon
,
double
decay
,
BaseLr
*
lr
)
:
ParameterOptimizer
(
lr
),
rho
(
rho
),
epsilon
(
epsilon
),
decay
(
decay
)
{}
AdadeltaOptimizer
(
double
rho
,
double
epsilon
,
double
decay
,
LrPolicy
*
lr
)
:
ParameterOptimizer
(
lr
),
rho_
(
rho
),
epsilon_
(
epsilon
),
decay_
(
decay
)
{}
~
AdadeltaOptimizer
()
{
if
(
accum_gradient
)
delete
accum_gradient
;
if
(
accum_delta
)
delete
accum_delta
;
if
(
update_delta
)
delete
update_delta
;
if
(
accum_gradient
_
)
delete
accum_gradient_
;
if
(
accum_delta
_
)
delete
accum_delta_
;
if
(
update_delta
_
)
delete
update_delta_
;
}
void
u
pdate
(
const
Tensor
*
gradient
);
void
U
pdate
(
const
Tensor
*
gradient
);
void
set_weight
(
Tensor
*
p
);
real
*
get_weight
()
const
;
private:
Tensor
*
accum_gradient
;
Tensor
*
accum_delta
;
Tensor
*
update_delta
;
Tensor
*
accum_gradient
_
;
Tensor
*
accum_delta
_
;
Tensor
*
update_delta
_
;
double
rho
;
double
epsilon
;
double
decay
;
double
rho
_
;
double
epsilon
_
;
double
decay
_
;
};
}
// namespace optimizer
...
...
paddle/optimizer/adagrad_optimizer.cc
浏览文件 @
fd8c5107
...
...
@@ -6,21 +6,22 @@ namespace paddle {
namespace
optimizer
{
void
AdagradOptimizer
::
set_weight
(
Tensor
*
p
)
{
parameter_
=
p
;
size_t
size
=
p
->
size
();
real
*
gptr
=
new
real
[
size
];
accum_gradient
=
new
Tensor
(
gptr
,
size
);
accum_gradient
_
=
new
Tensor
(
gptr
,
size
);
}
void
AdagradOptimizer
::
u
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
+=
1
;
double
learning_rate
=
lr_policy
->
get_learning_rate
(
num_sample_passed
);
void
AdagradOptimizer
::
U
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
_
+=
1
;
double
learning_rate
=
lr_policy
_
->
LearningRate
(
num_sample_passed_
);
Tensor
&
param
=
*
parameter_
;
Tensor
&
accum_g
=
*
accum_gradient_
;
const
Tensor
&
grad
=
*
gradient
;
Tensor
&
accum_g
=
*
accum_gradient
;
for
(
size_t
i
=
0
;
i
<
param
.
size
();
++
i
)
{
accum_g
[
i
]
+=
grad
[
i
]
*
grad
[
i
];
param
[
i
]
+=
learning_rate
*
grad
[
i
]
/
std
::
sqrt
(
accum_g
[
i
]
+
epsilon
)
+
learning_rate
*
decay
*
param
[
i
];
param
[
i
]
+=
learning_rate
*
grad
[
i
]
/
std
::
sqrt
(
accum_g
[
i
]
+
epsilon
_
)
+
learning_rate
*
decay
_
*
param
[
i
];
}
}
...
...
paddle/optimizer/adagrad_optimizer.h
浏览文件 @
fd8c5107
...
...
@@ -8,19 +8,19 @@ namespace optimizer {
class
AdagradOptimizer
:
public
ParameterOptimizer
{
public:
AdagradOptimizer
(
double
epsilon
,
double
decay
,
BaseLr
*
lr
)
:
ParameterOptimizer
(
lr
),
epsilon
(
epsilon
),
decay
(
decay
)
{}
AdagradOptimizer
(
double
epsilon
,
double
decay
,
LrPolicy
*
lr
)
:
ParameterOptimizer
(
lr
),
epsilon
_
(
epsilon
),
decay_
(
decay
)
{}
~
AdagradOptimizer
()
{
if
(
accum_gradient
)
delete
accum_gradient
;
if
(
accum_gradient
_
)
delete
accum_gradient_
;
}
void
u
pdate
(
const
Tensor
*
gradient
);
void
U
pdate
(
const
Tensor
*
gradient
);
void
set_weight
(
Tensor
*
p
);
real
*
get_weight
()
const
;
private:
Tensor
*
accum_gradient
;
double
epsilon
;
double
decay
;
Tensor
*
accum_gradient
_
;
double
epsilon
_
;
double
decay
_
;
};
}
// namespace optimizer
...
...
paddle/optimizer/adam_optimizer.cc
浏览文件 @
fd8c5107
...
...
@@ -5,6 +5,7 @@ namespace paddle {
namespace
optimizer
{
void
AdamOptimizer
::
set_weight
(
Tensor
*
p
)
{
parameter_
=
p
;
size_t
size
=
p
->
size
();
real
*
mptr
=
new
real
[
size
];
momentums_
=
new
Tensor
(
mptr
,
size
);
...
...
@@ -12,21 +13,21 @@ void AdamOptimizer::set_weight(Tensor *p) {
velocitys_
=
new
Tensor
(
vptr
,
size
);
}
void
AdamOptimizer
::
u
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
+=
1
;
double
learning_rate
=
lr_policy
->
get_learning_rate
(
num_sample_passed
);
double
coef1
=
1.0
-
std
::
pow
(
beta_1
,
num_sample_passed
);
double
coef2
=
1.0
-
std
::
pow
(
beta_2
,
num_sample_passed
);
void
AdamOptimizer
::
U
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
_
+=
1
;
double
learning_rate
=
lr_policy
_
->
LearningRate
(
num_sample_passed_
);
double
coef1
=
1.0
-
std
::
pow
(
beta_1
_
,
num_sample_passed_
);
double
coef2
=
1.0
-
std
::
pow
(
beta_2
_
,
num_sample_passed_
);
learning_rate
*=
std
::
sqrt
(
coef2
)
/
coef1
;
Tensor
&
param
=
*
parameter_
;
const
Tensor
&
grad
=
*
gradient
;
Tensor
&
m
=
*
momentums_
;
Tensor
&
v
=
*
velocitys_
;
for
(
size_t
i
=
0
;
i
<
param
.
size
();
++
i
)
{
m
[
i
]
=
beta_1
*
m
[
i
]
+
(
1.0
-
beta_1
)
*
grad
[
i
];
v
[
i
]
=
beta_2
*
v
[
i
]
+
(
1.0
-
beta_2
)
*
grad
[
i
]
*
grad
[
i
];
m
[
i
]
=
beta_1
_
*
m
[
i
]
+
(
1.0
-
beta_1_
)
*
grad
[
i
];
v
[
i
]
=
beta_2
_
*
v
[
i
]
+
(
1.0
-
beta_2_
)
*
grad
[
i
]
*
grad
[
i
];
param
[
i
]
-=
learning_rate
*
(
m
[
i
]
/
std
::
sqrt
(
v
[
i
]
+
epsilon
)
+
decay
*
param
[
i
]);
learning_rate
*
(
m
[
i
]
/
std
::
sqrt
(
v
[
i
]
+
epsilon
_
)
+
decay_
*
param
[
i
]);
}
}
}
// namespace optimizer
...
...
paddle/optimizer/adam_optimizer.h
浏览文件 @
fd8c5107
...
...
@@ -9,27 +9,27 @@ namespace optimizer {
class
AdamOptimizer
:
public
ParameterOptimizer
{
public:
AdamOptimizer
(
double
beta_1
,
double
beta_2
,
double
epsilon
,
double
decay
,
BaseLr
*
lr
)
double
beta_1
,
double
beta_2
,
double
epsilon
,
double
decay
,
LrPolicy
*
lr
)
:
ParameterOptimizer
(
lr
),
beta_1
(
beta_1
),
beta_2
(
beta_2
),
epsilon
(
epsilon
),
decay
(
decay
)
{}
beta_1
_
(
beta_1
),
beta_2
_
(
beta_2
),
epsilon
_
(
epsilon
),
decay
_
(
decay
)
{}
~
AdamOptimizer
()
{
if
(
momentums_
)
delete
momentums_
;
if
(
velocitys_
)
delete
velocitys_
;
}
void
u
pdate
(
const
Tensor
*
gradient
);
void
U
pdate
(
const
Tensor
*
gradient
);
void
set_weight
(
Tensor
*
p
);
real
*
get_weight
()
const
;
private:
Tensor
*
momentums_
;
Tensor
*
velocitys_
;
double
beta_1
;
double
beta_2
;
double
epsilon
;
double
decay
;
double
beta_1
_
;
double
beta_2
_
;
double
epsilon
_
;
double
decay
_
;
};
}
// namespace optimizer
...
...
paddle/optimizer/lr_policy.h
浏览文件 @
fd8c5107
...
...
@@ -7,34 +7,34 @@
namespace
paddle
{
namespace
optimizer
{
class
BaseLr
{
class
LrPolicy
{
public:
BaseLr
(
double
lr
)
:
learning_rate
(
lr
)
{}
virtual
~
BaseLr
()
{}
virtual
double
get_learning_rate
(
const
uint64_t
num_sample_passed
)
=
0
;
protected:
double
learning_rate
;
virtual
~
LrPolicy
()
{}
virtual
double
LearningRate
(
const
uint64_t
num_sample_passed
)
=
0
;
};
// constant learning rate policy
class
ConstLr
final
:
public
BaseLr
{
class
ConstLr
final
:
public
LrPolicy
{
public:
ConstLr
(
double
lr
)
:
BaseLr
(
lr
){};
double
get_learning_r
ate
(
const
uint64_t
num_sample_passed
)
{
ConstLr
(
double
lr
)
:
learning_rate
(
lr
){};
double
LearningR
ate
(
const
uint64_t
num_sample_passed
)
{
return
learning_rate
;
}
protected:
double
learning_rate
;
};
class
LinearLr
final
:
public
BaseLr
{
class
LinearLr
final
:
public
LrPolicy
{
public:
LinearLr
(
double
lr
,
double
lr_decay_a
,
double
lr_decay_b
)
:
BaseLr
(
lr
),
lr_decay_a
(
lr_decay_a
),
lr_decay_b
(
lr_decay_b
)
{}
double
get_learning_r
ate
(
const
uint64_t
num_sample_passed
)
{
:
learning_rate
(
lr
),
lr_decay_a
(
lr_decay_a
),
lr_decay_b
(
lr_decay_b
)
{}
double
LearningR
ate
(
const
uint64_t
num_sample_passed
)
{
return
std
::
max
(
learning_rate
-
lr_decay_a
*
num_sample_passed
,
lr_decay_b
);
}
private:
double
learning_rate
;
double
lr_decay_a
;
double
lr_decay_b
;
};
...
...
paddle/optimizer/optimizer.cc
浏览文件 @
fd8c5107
...
...
@@ -37,7 +37,7 @@ paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
int
config_proto_len
)
{
paddle_optimizer
*
optimizer
=
new
paddle_optimizer
;
std
::
string
config
(
config_proto
,
config_proto
+
config_proto_len
);
optimizer
->
impl
=
ParameterOptimizer
::
c
reate
(
config
);
optimizer
->
impl
=
ParameterOptimizer
::
C
reate
(
config
);
return
optimizer
;
}
...
...
@@ -53,7 +53,7 @@ int paddle_update_parameter(paddle_optimizer* o,
// TOOD(zhihong): datatype not work. need to add the runtime datatype
auto
grad_type
=
reinterpret_cast
<
const
real
*>
(
grad_buffer
);
Tensor
*
gradient
=
new
Tensor
(
const_cast
<
real
*>
(
grad_type
),
num_bytes
);
o
->
impl
->
u
pdate
(
gradient
);
o
->
impl
->
U
pdate
(
gradient
);
return
PADDLE_SUCCESS
;
}
...
...
paddle/optimizer/optimizer_test.cpp
已删除
100644 → 0
浏览文件 @
5ab958ba
#include "optimizer.h"
#include "gtest/gtest.h"
template
<
class
T
>
class
Opitmizer_C_Test
:
public
testing
::
Test
{
private:
Tensor
<
T
>
parameter
;
Tensor
<
T
>
gradient
;
};
void
applyGradientDescent_TEST
()
{}
paddle/optimizer/parameter_optimizer.cc
浏览文件 @
fd8c5107
...
...
@@ -10,41 +10,40 @@
namespace
paddle
{
namespace
optimizer
{
ParameterOptimizer
*
ParameterOptimizer
::
c
reate
(
const
::
std
::
string
&
config_proto
)
{
ParameterOptimizer
*
ParameterOptimizer
::
C
reate
(
const
std
::
string
&
config_proto
)
{
paddle
::
OptimizerConfig
config
;
CHECK
(
config
.
ParseFromString
(
config_proto
)
==
0
)
<<
"
error :
optimizer config"
;
<<
"
failed parse
optimizer config"
;
auto
select_lr_policy
=
[
=
](
const
OptimizerConfig
&
config
)
->
BaseLr
*
{
std
::
string
s
(
config
.
lr_policy
());
if
(
s
==
"ConstLr"
)
return
new
ConstLr
(
config
.
const_lr
().
learning_rate
());
if
(
s
==
"LinearLr"
)
auto
select_lr_policy
=
[
=
](
const
OptimizerConfig
&
config
)
->
LrPolicy
*
{
if
(
config
.
lr_policy
()
==
OptimizerConfig
::
ConstLr
)
return
new
ConstLr
(
config
.
const_lr
().
learning_rate
());
if
(
config
.
lr_policy
()
==
OptimizerConfig
::
LinearLr
)
return
new
LinearLr
(
config
.
linear_lr
().
learning_rate
(),
config
.
linear_lr
().
lr_decay_a
(),
config
.
linear_lr
().
lr_decay_b
());
// default
return
nullptr
;
};
BaseLr
*
lr
=
select_lr_policy
(
config
);
LrPolicy
*
lr
=
select_lr_policy
(
config
);
auto
select_optimizer
=
[
=
](
const
OptimizerConfig
&
config
)
->
ParameterOptimizer
*
{
std
::
string
s
(
config
.
optimizer_name
());
if
(
s
==
"SGD"
)
{
if
(
config
.
optimizer
()
==
OptimizerConfig
::
SGD
)
{
return
new
SGDOptimizer
(
config
.
sgd
().
momentum
(),
config
.
sgd
().
decay
(),
config
.
sgd
().
nesterov
(),
lr
);
}
if
(
s
==
"Adadelta"
)
{
if
(
config
.
optimizer
()
==
OptimizerConfig
::
Adadelta
)
{
return
new
AdagradOptimizer
(
config
.
adagrad
().
epsilon
(),
config
.
adagrad
().
decay
(),
lr
);
}
if
(
s
==
"Adagrad"
)
{
if
(
config
.
optimizer
()
==
OptimizerConfig
::
Adagrad
)
{
return
new
AdagradOptimizer
(
config
.
adagrad
().
epsilon
(),
config
.
adagrad
().
decay
(),
lr
);
}
if
(
s
==
"Adam"
)
{
if
(
config
.
optimizer
()
==
OptimizerConfig
::
Adam
)
{
return
new
AdadeltaOptimizer
(
config
.
adadelta
().
rho
(),
config
.
adadelta
().
epsilon
(),
config
.
adadelta
().
decay
(),
...
...
paddle/optimizer/parameter_optimizer.h
浏览文件 @
fd8c5107
...
...
@@ -5,8 +5,8 @@
#include <functional>
#include <string>
#include "OptimizerConfig.pb.h"
#include "Tensor.h"
#include "lr_policy.h"
#include "tensor.h"
namespace
paddle
{
namespace
optimizer
{
...
...
@@ -17,21 +17,21 @@ public:
* @brief update hook for algorithm need to traverse parameter more than
* once.
*/
ParameterOptimizer
(
BaseLr
*
lr
)
:
lr_policy
(
lr
),
num_sample_passed
(
0
)
{}
ParameterOptimizer
(
LrPolicy
*
lr
)
:
lr_policy_
(
lr
),
num_sample_passed_
(
0
)
{}
virtual
~
ParameterOptimizer
()
{
delete
parameter_
;
};
static
ParameterOptimizer
*
create
(
const
::
std
::
string
&
config_proto
);
virtual
void
u
pdate
(
const
Tensor
*
gradient
)
=
0
;
static
ParameterOptimizer
*
Create
(
const
std
::
string
&
config_proto
);
virtual
void
U
pdate
(
const
Tensor
*
gradient
)
=
0
;
virtual
real
*
get_weight
()
const
;
virtual
void
set_weight
(
Tensor
*
parameter
);
p
ublic
:
p
rotected
:
OptimizerConfig
config_
;
Tensor
*
parameter_
;
// learning rate policy
BaseLr
*
lr_policy
;
uint64_t
num_sample_passed
;
LrPolicy
*
lr_policy_
;
uint64_t
num_sample_passed
_
;
};
}
// namespace optimizer
...
...
paddle/optimizer/parameter_optimizer_test.cpp
浏览文件 @
fd8c5107
...
...
@@ -42,28 +42,28 @@ public:
virtual
void
TearDown
()
{}
void
create_sgd
()
{
config
.
set_optimizer
_name
(
"SGD"
);
config
.
set_optimizer
(
OptimizerConfig
::
SGD
);
config
.
mutable_sgd
()
->
set_momentum
(
0.0
);
config
.
mutable_sgd
()
->
set_decay
(
0.0
);
config
.
mutable_sgd
()
->
set_nesterov
(
false
);
config
.
set_lr_policy
(
"ConstLr"
);
config
.
set_lr_policy
(
OptimizerConfig
::
ConstLr
);
config
.
mutable_const_lr
()
->
set_learning_rate
(
0.1
);
ParameterOptimizer
*
opt
=
ParameterOptimizer
::
c
reate
(
config
.
SerializeAsString
());
ParameterOptimizer
::
C
reate
(
config
.
SerializeAsString
());
opts
.
push_back
(
opt
);
}
void
create_adam
()
{
config
.
set_optimizer
_name
(
"Adam"
);
config
.
set_optimizer
(
OptimizerConfig
::
Adam
);
config
.
mutable_adam
()
->
set_beta_1
(
0.9
);
config
.
mutable_adam
()
->
set_beta_2
(
0.1
);
config
.
mutable_adam
()
->
set_epsilon
(
1e-3
);
config
.
mutable_adam
()
->
set_decay
(
0.0
);
config
.
set_lr_policy
(
"ConstLr"
);
config
.
set_lr_policy
(
OptimizerConfig
::
ConstLr
);
config
.
mutable_const_lr
()
->
set_learning_rate
(
0.1
);
ParameterOptimizer
*
opt
=
ParameterOptimizer
::
c
reate
(
config
.
SerializeAsString
());
ParameterOptimizer
::
C
reate
(
config
.
SerializeAsString
());
opts
.
push_back
(
opt
);
}
void
test_set_weight
()
{
...
...
@@ -88,7 +88,7 @@ public:
void
test_update
()
{
Tensor
*
g
=
fix_n_Tensor
(
size
);
for
(
size_t
i
=
0
;
i
<
opts
.
size
();
++
i
)
{
opts
[
i
]
->
u
pdate
(
g
);
opts
[
i
]
->
U
pdate
(
g
);
}
}
...
...
paddle/optimizer/sgd_optimizer.h
浏览文件 @
fd8c5107
...
...
@@ -8,23 +8,19 @@ namespace optimizer {
class
SGDOptimizer
:
public
ParameterOptimizer
{
public:
using
ParameterOptimizer
::
parameter_
;
using
ParameterOptimizer
::
num_sample_passed
;
using
ParameterOptimizer
::
lr_policy
;
SGDOptimizer
(
double
m
,
double
d
,
bool
n
,
BaseLr
*
lr
)
:
ParameterOptimizer
(
lr
),
momentum
(
m
),
decay
(
d
),
nesterov
(
n
)
{}
SGDOptimizer
(
double
m
,
double
d
,
bool
n
,
LrPolicy
*
lr
)
:
ParameterOptimizer
(
lr
),
momentum_
(
m
),
decay_
(
d
),
nesterov_
(
n
)
{}
virtual
~
SGDOptimizer
()
{
delete
momentums_
;
}
void
u
pdate
(
const
Tensor
*
gradient
);
void
U
pdate
(
const
Tensor
*
gradient
);
void
set_weight
(
Tensor
*
p
);
real
*
get_weight
()
const
;
private:
Tensor
*
momentums_
;
double
momentum
;
double
decay
;
bool
nesterov
;
double
momentum
_
;
double
decay
_
;
bool
nesterov
_
;
};
}
// namespace optimizer
...
...
paddle/optimizer/sgd_optmizer.cc
浏览文件 @
fd8c5107
...
...
@@ -5,31 +5,32 @@ namespace optimizer {
void
SGDOptimizer
::
set_weight
(
Tensor
*
p
)
{
// ParameterOptimizer::set_weight(p);
parameter_
=
p
;
size_t
size
=
p
->
size
();
// TODO: fix it with align aware allocator bind to Tensor
if
(
momentum
!=
0.0
)
{
if
(
momentum
_
!=
0.0
)
{
real
*
ptr
=
new
real
[
size
];
momentums_
=
new
Tensor
(
ptr
,
size
);
}
}
void
SGDOptimizer
::
u
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
+=
1
;
double
learning_rate
=
lr_policy
->
get_learning_rate
(
num_sample_passed
);
void
SGDOptimizer
::
U
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
_
+=
1
;
double
learning_rate
=
lr_policy
_
->
LearningRate
(
num_sample_passed_
);
real
velocity
=
0.0
;
Tensor
&
param
=
*
parameter_
;
const
Tensor
&
grad
=
*
gradient
;
Tensor
&
m
=
*
momentums_
;
for
(
size_t
i
=
0
;
i
<
param
.
size
();
++
i
)
{
if
(
momentum
==
0.0
)
{
velocity
=
-
learning_rate
*
grad
[
i
]
-
learning_rate
*
decay
*
param
[
i
];
if
(
momentum
_
==
0.0
)
{
velocity
=
-
learning_rate
*
grad
[
i
]
-
learning_rate
*
decay
_
*
param
[
i
];
}
else
{
m
[
i
]
=
momentum
*
m
[
i
]
-
learning_rate
*
grad
[
i
]
-
learning_rate
*
decay
*
param
[
i
];
m
[
i
]
=
momentum
_
*
m
[
i
]
-
learning_rate
*
grad
[
i
]
-
learning_rate
*
decay
_
*
param
[
i
];
velocity
=
m
[
i
];
}
if
(
nesterov
)
{
param
[
i
]
+=
momentum
*
velocity
-
learning_rate
*
grad
[
i
];
if
(
nesterov
_
)
{
param
[
i
]
+=
momentum
_
*
velocity
-
learning_rate
*
grad
[
i
];
}
else
{
param
[
i
]
+=
velocity
;
}
...
...
proto/OptimizerConfig.proto
浏览文件 @
fd8c5107
...
...
@@ -54,38 +54,40 @@ message AdamConfig {
message
ConstLr
{
// learninRate Policy
required
double
learning_rate
=
40
[
default
=
1.0
];
required
double
learning_rate
=
1
[
default
=
1.0
];
}
message
LinearLr
{
// learninRate Policy
required
double
learning_rate
=
40
[
default
=
1.0
];
optional
double
lr_decay_a
=
2
5
;
optional
double
lr_decay_b
=
26
;
required
double
learning_rate
=
1
[
default
=
1.0
];
optional
double
lr_decay_a
=
2
;
optional
double
lr_decay_b
=
3
;
}
message
OptimizerConfig
{
// common config of optimizer
// algorithm config, type : string
// SGD = 1;
// Adadelta = 2;
// Adagrad = 3;
// Adam = 4;
required
string
optimizer_name
=
1
;
enum
Optimizer
{
SGD
=
1
;
Adadelta
=
2
;
Adagrad
=
3
;
Adam
=
4
;
}
required
Optimizer
optimizer
=
1
;
optional
SGDConfig
sgd
=
3
;
optional
AdadeltaConfig
adadelta
=
4
;
optional
AdagradConfig
adagrad
=
5
;
optional
AdamConfig
adam
=
6
;
// learning rate runtime policy config
// lr_policy , type : string
// ConstLr = 0;
// LinearLr = 1;
required
string
lr_policy
=
11
;
enum
LrPolicy
{
ConstLr
=
0
;
LinearLr
=
1
;
}
required
LrPolicy
lr_policy
=
11
;
optional
ConstLr
const_lr
=
12
;
optional
LinearLr
linear_lr
=
15
;
optional
uint64
num_sample_passed
=
13
[
default
=
0
];
optional
LinearLr
linear_lr
=
13
;
// common config of optimizer
optional
double
clipnorm
=
101
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录