Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
fd8c5107
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
fd8c5107
编写于
6月 06, 2017
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
"format name with google style"
上级
5ab958ba
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
142 addition
and
155 deletion
+142
-155
paddle/optimizer/CMakeLists.txt
paddle/optimizer/CMakeLists.txt
+3
-3
paddle/optimizer/Tensor.h
paddle/optimizer/Tensor.h
+1
-1
paddle/optimizer/Tensor_test.cpp
paddle/optimizer/Tensor_test.cpp
+2
-1
paddle/optimizer/adadelta_optimizer.cc
paddle/optimizer/adadelta_optimizer.cc
+15
-14
paddle/optimizer/adadelta_optimizer.h
paddle/optimizer/adadelta_optimizer.h
+12
-16
paddle/optimizer/adagrad_optimizer.cc
paddle/optimizer/adagrad_optimizer.cc
+8
-7
paddle/optimizer/adagrad_optimizer.h
paddle/optimizer/adagrad_optimizer.h
+7
-7
paddle/optimizer/adam_optimizer.cc
paddle/optimizer/adam_optimizer.cc
+9
-8
paddle/optimizer/adam_optimizer.h
paddle/optimizer/adam_optimizer.h
+10
-10
paddle/optimizer/lr_policy.h
paddle/optimizer/lr_policy.h
+13
-13
paddle/optimizer/optimizer.cc
paddle/optimizer/optimizer.cc
+2
-2
paddle/optimizer/optimizer_test.cpp
paddle/optimizer/optimizer_test.cpp
+0
-11
paddle/optimizer/parameter_optimizer.cc
paddle/optimizer/parameter_optimizer.cc
+12
-13
paddle/optimizer/parameter_optimizer.h
paddle/optimizer/parameter_optimizer.h
+7
-7
paddle/optimizer/parameter_optimizer_test.cpp
paddle/optimizer/parameter_optimizer_test.cpp
+7
-7
paddle/optimizer/sgd_optimizer.h
paddle/optimizer/sgd_optimizer.h
+6
-10
paddle/optimizer/sgd_optmizer.cc
paddle/optimizer/sgd_optmizer.cc
+11
-10
proto/OptimizerConfig.proto
proto/OptimizerConfig.proto
+17
-15
未找到文件。
paddle/optimizer/CMakeLists.txt
浏览文件 @
fd8c5107
...
...
@@ -9,7 +9,7 @@ set(OPITMIZER_SRCS
sgd_optmizer.cc
)
set
(
OPITMIZER_H
eaders
set
(
OPITMIZER_H
EADERS
adadelta_optimizer.h
adagrad_optimizer.h
adam_optimizer.h
...
...
@@ -17,12 +17,12 @@ set(OPITMIZER_Headers
optimizer.h
parameter_optimizer.h
sgd_optimizer.h
T
ensor.h
t
ensor.h
)
add_library
(
optimizer STATIC
${
OPITMIZER_SRCS
}
)
add_dependencies
(
optimizer gen_proto_cpp
)
add_simple_unittest
(
T
ensor_test
)
add_simple_unittest
(
t
ensor_test
)
add_simple_unittest
(
parameter_optimizer_test
)
add_dependencies
(
parameter_optimizer_test optimizer
)
paddle/optimizer/Tensor.h
浏览文件 @
fd8c5107
...
...
@@ -32,7 +32,7 @@ public:
return
data_
[
idx
];
}
// TODO: replace with tensorshape
size_t
size
()
const
{
return
this
->
width_
;
}
size_t
size
()
const
{
return
this
->
width_
*
this
->
height_
;
}
protected:
size_t
height_
;
...
...
paddle/optimizer/Tensor_test.cpp
浏览文件 @
fd8c5107
#include "Tensor.h"
#include <iostream>
#include "gtest/gtest.h"
#include "tensor.h"
using
namespace
paddle
;
using
namespace
paddle
::
optimizer
;
...
...
@@ -13,6 +13,7 @@ TEST(Tensor, indexer) {
}
ASSERT_EQ
(
t
[
2
],
2
);
ASSERT_EQ
(
t
[
1
],
1
);
delete
ptr
;
}
int
main
(
int
argc
,
char
**
argv
)
{
...
...
paddle/optimizer/adadelta_optimizer.cc
浏览文件 @
fd8c5107
...
...
@@ -6,32 +6,33 @@ namespace paddle {
namespace
optimizer
{
void
AdadeltaOptimizer
::
set_weight
(
Tensor
*
p
)
{
parameter_
=
p
;
size_t
size
=
p
->
size
();
real
*
gptr
=
new
real
[
size
];
accum_gradient
=
new
Tensor
(
gptr
,
size
);
accum_gradient
_
=
new
Tensor
(
gptr
,
size
);
real
*
dptr
=
new
real
[
size
];
accum_delta
=
new
Tensor
(
dptr
,
size
);
accum_delta
_
=
new
Tensor
(
dptr
,
size
);
real
*
dptr_current
=
new
real
[
size
];
update_delta
=
new
Tensor
(
dptr_current
,
size
);
update_delta
_
=
new
Tensor
(
dptr_current
,
size
);
}
void
AdadeltaOptimizer
::
u
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
+=
1
;
double
learning_rate
=
lr_policy
->
get_learning_rate
(
num_sample_passed
);
void
AdadeltaOptimizer
::
U
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
_
+=
1
;
double
learning_rate
=
lr_policy
_
->
LearningRate
(
num_sample_passed_
);
Tensor
&
param
=
*
parameter_
;
const
Tensor
&
grad
=
*
gradient
;
Tensor
&
accum_g
=
*
accum_gradient
;
Tensor
&
accum_d
=
*
accum_delta
;
Tensor
&
update_d
=
*
update_delta
;
Tensor
&
accum_g
=
*
accum_gradient
_
;
Tensor
&
accum_d
=
*
accum_delta
_
;
Tensor
&
update_d
=
*
update_delta
_
;
for
(
size_t
i
=
0
;
i
<
param
.
size
();
++
i
)
{
accum_g
[
i
]
=
rho
*
accum_g
[
i
]
+
(
1.0
-
rho
)
*
grad
[
i
]
*
grad
[
i
];
accum_g
[
i
]
=
rho
_
*
accum_g
[
i
]
+
(
1.0
-
rho_
)
*
grad
[
i
]
*
grad
[
i
];
update_d
[
i
]
=
std
::
sqrt
(
accum_d
[
i
]
+
epsilon
)
/
std
::
sqrt
(
accum_g
[
i
]
+
epsilon
)
*
grad
[
i
];
update_d
[
i
]
=
std
::
sqrt
(
accum_d
[
i
]
+
epsilon
_
)
/
std
::
sqrt
(
accum_g
[
i
]
+
epsilon
_
)
*
grad
[
i
];
accum_d
[
i
]
=
rho
*
accum_d
[
i
]
+
(
1.0
-
rho
)
*
update_d
[
i
]
*
update_d
[
i
];
accum_d
[
i
]
=
rho
_
*
accum_d
[
i
]
+
(
1.0
-
rho_
)
*
update_d
[
i
]
*
update_d
[
i
];
param
[
i
]
-=
learning_rate
*
update_d
[
i
]
+
learning_rate
*
decay
*
param
[
i
];
param
[
i
]
-=
learning_rate
*
update_d
[
i
]
+
learning_rate
*
decay
_
*
param
[
i
];
}
}
}
// namespace optimizer
...
...
paddle/optimizer/adadelta_optimizer.h
浏览文件 @
fd8c5107
...
...
@@ -8,29 +8,25 @@ namespace optimizer {
class
AdadeltaOptimizer
:
public
ParameterOptimizer
{
public:
using
ParameterOptimizer
::
parameter_
;
using
ParameterOptimizer
::
num_sample_passed
;
using
ParameterOptimizer
::
lr_policy
;
AdadeltaOptimizer
(
double
rho
,
double
epsilon
,
double
decay
,
BaseLr
*
lr
)
:
ParameterOptimizer
(
lr
),
rho
(
rho
),
epsilon
(
epsilon
),
decay
(
decay
)
{}
AdadeltaOptimizer
(
double
rho
,
double
epsilon
,
double
decay
,
LrPolicy
*
lr
)
:
ParameterOptimizer
(
lr
),
rho_
(
rho
),
epsilon_
(
epsilon
),
decay_
(
decay
)
{}
~
AdadeltaOptimizer
()
{
if
(
accum_gradient
)
delete
accum_gradient
;
if
(
accum_delta
)
delete
accum_delta
;
if
(
update_delta
)
delete
update_delta
;
if
(
accum_gradient
_
)
delete
accum_gradient_
;
if
(
accum_delta
_
)
delete
accum_delta_
;
if
(
update_delta
_
)
delete
update_delta_
;
}
void
u
pdate
(
const
Tensor
*
gradient
);
void
U
pdate
(
const
Tensor
*
gradient
);
void
set_weight
(
Tensor
*
p
);
real
*
get_weight
()
const
;
private:
Tensor
*
accum_gradient
;
Tensor
*
accum_delta
;
Tensor
*
update_delta
;
Tensor
*
accum_gradient
_
;
Tensor
*
accum_delta
_
;
Tensor
*
update_delta
_
;
double
rho
;
double
epsilon
;
double
decay
;
double
rho
_
;
double
epsilon
_
;
double
decay
_
;
};
}
// namespace optimizer
...
...
paddle/optimizer/adagrad_optimizer.cc
浏览文件 @
fd8c5107
...
...
@@ -6,21 +6,22 @@ namespace paddle {
namespace
optimizer
{
void
AdagradOptimizer
::
set_weight
(
Tensor
*
p
)
{
parameter_
=
p
;
size_t
size
=
p
->
size
();
real
*
gptr
=
new
real
[
size
];
accum_gradient
=
new
Tensor
(
gptr
,
size
);
accum_gradient
_
=
new
Tensor
(
gptr
,
size
);
}
void
AdagradOptimizer
::
u
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
+=
1
;
double
learning_rate
=
lr_policy
->
get_learning_rate
(
num_sample_passed
);
void
AdagradOptimizer
::
U
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
_
+=
1
;
double
learning_rate
=
lr_policy
_
->
LearningRate
(
num_sample_passed_
);
Tensor
&
param
=
*
parameter_
;
Tensor
&
accum_g
=
*
accum_gradient_
;
const
Tensor
&
grad
=
*
gradient
;
Tensor
&
accum_g
=
*
accum_gradient
;
for
(
size_t
i
=
0
;
i
<
param
.
size
();
++
i
)
{
accum_g
[
i
]
+=
grad
[
i
]
*
grad
[
i
];
param
[
i
]
+=
learning_rate
*
grad
[
i
]
/
std
::
sqrt
(
accum_g
[
i
]
+
epsilon
)
+
learning_rate
*
decay
*
param
[
i
];
param
[
i
]
+=
learning_rate
*
grad
[
i
]
/
std
::
sqrt
(
accum_g
[
i
]
+
epsilon
_
)
+
learning_rate
*
decay
_
*
param
[
i
];
}
}
...
...
paddle/optimizer/adagrad_optimizer.h
浏览文件 @
fd8c5107
...
...
@@ -8,19 +8,19 @@ namespace optimizer {
class
AdagradOptimizer
:
public
ParameterOptimizer
{
public:
AdagradOptimizer
(
double
epsilon
,
double
decay
,
BaseLr
*
lr
)
:
ParameterOptimizer
(
lr
),
epsilon
(
epsilon
),
decay
(
decay
)
{}
AdagradOptimizer
(
double
epsilon
,
double
decay
,
LrPolicy
*
lr
)
:
ParameterOptimizer
(
lr
),
epsilon
_
(
epsilon
),
decay_
(
decay
)
{}
~
AdagradOptimizer
()
{
if
(
accum_gradient
)
delete
accum_gradient
;
if
(
accum_gradient
_
)
delete
accum_gradient_
;
}
void
u
pdate
(
const
Tensor
*
gradient
);
void
U
pdate
(
const
Tensor
*
gradient
);
void
set_weight
(
Tensor
*
p
);
real
*
get_weight
()
const
;
private:
Tensor
*
accum_gradient
;
double
epsilon
;
double
decay
;
Tensor
*
accum_gradient
_
;
double
epsilon
_
;
double
decay
_
;
};
}
// namespace optimizer
...
...
paddle/optimizer/adam_optimizer.cc
浏览文件 @
fd8c5107
...
...
@@ -5,6 +5,7 @@ namespace paddle {
namespace
optimizer
{
void
AdamOptimizer
::
set_weight
(
Tensor
*
p
)
{
parameter_
=
p
;
size_t
size
=
p
->
size
();
real
*
mptr
=
new
real
[
size
];
momentums_
=
new
Tensor
(
mptr
,
size
);
...
...
@@ -12,21 +13,21 @@ void AdamOptimizer::set_weight(Tensor *p) {
velocitys_
=
new
Tensor
(
vptr
,
size
);
}
void
AdamOptimizer
::
u
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
+=
1
;
double
learning_rate
=
lr_policy
->
get_learning_rate
(
num_sample_passed
);
double
coef1
=
1.0
-
std
::
pow
(
beta_1
,
num_sample_passed
);
double
coef2
=
1.0
-
std
::
pow
(
beta_2
,
num_sample_passed
);
void
AdamOptimizer
::
U
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
_
+=
1
;
double
learning_rate
=
lr_policy
_
->
LearningRate
(
num_sample_passed_
);
double
coef1
=
1.0
-
std
::
pow
(
beta_1
_
,
num_sample_passed_
);
double
coef2
=
1.0
-
std
::
pow
(
beta_2
_
,
num_sample_passed_
);
learning_rate
*=
std
::
sqrt
(
coef2
)
/
coef1
;
Tensor
&
param
=
*
parameter_
;
const
Tensor
&
grad
=
*
gradient
;
Tensor
&
m
=
*
momentums_
;
Tensor
&
v
=
*
velocitys_
;
for
(
size_t
i
=
0
;
i
<
param
.
size
();
++
i
)
{
m
[
i
]
=
beta_1
*
m
[
i
]
+
(
1.0
-
beta_1
)
*
grad
[
i
];
v
[
i
]
=
beta_2
*
v
[
i
]
+
(
1.0
-
beta_2
)
*
grad
[
i
]
*
grad
[
i
];
m
[
i
]
=
beta_1
_
*
m
[
i
]
+
(
1.0
-
beta_1_
)
*
grad
[
i
];
v
[
i
]
=
beta_2
_
*
v
[
i
]
+
(
1.0
-
beta_2_
)
*
grad
[
i
]
*
grad
[
i
];
param
[
i
]
-=
learning_rate
*
(
m
[
i
]
/
std
::
sqrt
(
v
[
i
]
+
epsilon
)
+
decay
*
param
[
i
]);
learning_rate
*
(
m
[
i
]
/
std
::
sqrt
(
v
[
i
]
+
epsilon
_
)
+
decay_
*
param
[
i
]);
}
}
}
// namespace optimizer
...
...
paddle/optimizer/adam_optimizer.h
浏览文件 @
fd8c5107
...
...
@@ -9,27 +9,27 @@ namespace optimizer {
class
AdamOptimizer
:
public
ParameterOptimizer
{
public:
AdamOptimizer
(
double
beta_1
,
double
beta_2
,
double
epsilon
,
double
decay
,
BaseLr
*
lr
)
double
beta_1
,
double
beta_2
,
double
epsilon
,
double
decay
,
LrPolicy
*
lr
)
:
ParameterOptimizer
(
lr
),
beta_1
(
beta_1
),
beta_2
(
beta_2
),
epsilon
(
epsilon
),
decay
(
decay
)
{}
beta_1
_
(
beta_1
),
beta_2
_
(
beta_2
),
epsilon
_
(
epsilon
),
decay
_
(
decay
)
{}
~
AdamOptimizer
()
{
if
(
momentums_
)
delete
momentums_
;
if
(
velocitys_
)
delete
velocitys_
;
}
void
u
pdate
(
const
Tensor
*
gradient
);
void
U
pdate
(
const
Tensor
*
gradient
);
void
set_weight
(
Tensor
*
p
);
real
*
get_weight
()
const
;
private:
Tensor
*
momentums_
;
Tensor
*
velocitys_
;
double
beta_1
;
double
beta_2
;
double
epsilon
;
double
decay
;
double
beta_1
_
;
double
beta_2
_
;
double
epsilon
_
;
double
decay
_
;
};
}
// namespace optimizer
...
...
paddle/optimizer/lr_policy.h
浏览文件 @
fd8c5107
...
...
@@ -7,34 +7,34 @@
namespace
paddle
{
namespace
optimizer
{
class
BaseLr
{
class
LrPolicy
{
public:
BaseLr
(
double
lr
)
:
learning_rate
(
lr
)
{}
virtual
~
BaseLr
()
{}
virtual
double
get_learning_rate
(
const
uint64_t
num_sample_passed
)
=
0
;
protected:
double
learning_rate
;
virtual
~
LrPolicy
()
{}
virtual
double
LearningRate
(
const
uint64_t
num_sample_passed
)
=
0
;
};
// constant learning rate policy
class
ConstLr
final
:
public
BaseLr
{
class
ConstLr
final
:
public
LrPolicy
{
public:
ConstLr
(
double
lr
)
:
BaseLr
(
lr
){};
double
get_learning_r
ate
(
const
uint64_t
num_sample_passed
)
{
ConstLr
(
double
lr
)
:
learning_rate
(
lr
){};
double
LearningR
ate
(
const
uint64_t
num_sample_passed
)
{
return
learning_rate
;
}
protected:
double
learning_rate
;
};
class
LinearLr
final
:
public
BaseLr
{
class
LinearLr
final
:
public
LrPolicy
{
public:
LinearLr
(
double
lr
,
double
lr_decay_a
,
double
lr_decay_b
)
:
BaseLr
(
lr
),
lr_decay_a
(
lr_decay_a
),
lr_decay_b
(
lr_decay_b
)
{}
double
get_learning_r
ate
(
const
uint64_t
num_sample_passed
)
{
:
learning_rate
(
lr
),
lr_decay_a
(
lr_decay_a
),
lr_decay_b
(
lr_decay_b
)
{}
double
LearningR
ate
(
const
uint64_t
num_sample_passed
)
{
return
std
::
max
(
learning_rate
-
lr_decay_a
*
num_sample_passed
,
lr_decay_b
);
}
private:
double
learning_rate
;
double
lr_decay_a
;
double
lr_decay_b
;
};
...
...
paddle/optimizer/optimizer.cc
浏览文件 @
fd8c5107
...
...
@@ -37,7 +37,7 @@ paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
int
config_proto_len
)
{
paddle_optimizer
*
optimizer
=
new
paddle_optimizer
;
std
::
string
config
(
config_proto
,
config_proto
+
config_proto_len
);
optimizer
->
impl
=
ParameterOptimizer
::
c
reate
(
config
);
optimizer
->
impl
=
ParameterOptimizer
::
C
reate
(
config
);
return
optimizer
;
}
...
...
@@ -53,7 +53,7 @@ int paddle_update_parameter(paddle_optimizer* o,
// TOOD(zhihong): datatype not work. need to add the runtime datatype
auto
grad_type
=
reinterpret_cast
<
const
real
*>
(
grad_buffer
);
Tensor
*
gradient
=
new
Tensor
(
const_cast
<
real
*>
(
grad_type
),
num_bytes
);
o
->
impl
->
u
pdate
(
gradient
);
o
->
impl
->
U
pdate
(
gradient
);
return
PADDLE_SUCCESS
;
}
...
...
paddle/optimizer/optimizer_test.cpp
已删除
100644 → 0
浏览文件 @
5ab958ba
#include "optimizer.h"
#include "gtest/gtest.h"
template
<
class
T
>
class
Opitmizer_C_Test
:
public
testing
::
Test
{
private:
Tensor
<
T
>
parameter
;
Tensor
<
T
>
gradient
;
};
void
applyGradientDescent_TEST
()
{}
paddle/optimizer/parameter_optimizer.cc
浏览文件 @
fd8c5107
...
...
@@ -10,41 +10,40 @@
namespace
paddle
{
namespace
optimizer
{
ParameterOptimizer
*
ParameterOptimizer
::
c
reate
(
const
::
std
::
string
&
config_proto
)
{
ParameterOptimizer
*
ParameterOptimizer
::
C
reate
(
const
std
::
string
&
config_proto
)
{
paddle
::
OptimizerConfig
config
;
CHECK
(
config
.
ParseFromString
(
config_proto
)
==
0
)
<<
"
error :
optimizer config"
;
<<
"
failed parse
optimizer config"
;
auto
select_lr_policy
=
[
=
](
const
OptimizerConfig
&
config
)
->
BaseLr
*
{
std
::
string
s
(
config
.
lr_policy
());
if
(
s
==
"ConstLr"
)
return
new
ConstLr
(
config
.
const_lr
().
learning_rate
());
if
(
s
==
"LinearLr"
)
auto
select_lr_policy
=
[
=
](
const
OptimizerConfig
&
config
)
->
LrPolicy
*
{
if
(
config
.
lr_policy
()
==
OptimizerConfig
::
ConstLr
)
return
new
ConstLr
(
config
.
const_lr
().
learning_rate
());
if
(
config
.
lr_policy
()
==
OptimizerConfig
::
LinearLr
)
return
new
LinearLr
(
config
.
linear_lr
().
learning_rate
(),
config
.
linear_lr
().
lr_decay_a
(),
config
.
linear_lr
().
lr_decay_b
());
// default
return
nullptr
;
};
BaseLr
*
lr
=
select_lr_policy
(
config
);
LrPolicy
*
lr
=
select_lr_policy
(
config
);
auto
select_optimizer
=
[
=
](
const
OptimizerConfig
&
config
)
->
ParameterOptimizer
*
{
std
::
string
s
(
config
.
optimizer_name
());
if
(
s
==
"SGD"
)
{
if
(
config
.
optimizer
()
==
OptimizerConfig
::
SGD
)
{
return
new
SGDOptimizer
(
config
.
sgd
().
momentum
(),
config
.
sgd
().
decay
(),
config
.
sgd
().
nesterov
(),
lr
);
}
if
(
s
==
"Adadelta"
)
{
if
(
config
.
optimizer
()
==
OptimizerConfig
::
Adadelta
)
{
return
new
AdagradOptimizer
(
config
.
adagrad
().
epsilon
(),
config
.
adagrad
().
decay
(),
lr
);
}
if
(
s
==
"Adagrad"
)
{
if
(
config
.
optimizer
()
==
OptimizerConfig
::
Adagrad
)
{
return
new
AdagradOptimizer
(
config
.
adagrad
().
epsilon
(),
config
.
adagrad
().
decay
(),
lr
);
}
if
(
s
==
"Adam"
)
{
if
(
config
.
optimizer
()
==
OptimizerConfig
::
Adam
)
{
return
new
AdadeltaOptimizer
(
config
.
adadelta
().
rho
(),
config
.
adadelta
().
epsilon
(),
config
.
adadelta
().
decay
(),
...
...
paddle/optimizer/parameter_optimizer.h
浏览文件 @
fd8c5107
...
...
@@ -5,8 +5,8 @@
#include <functional>
#include <string>
#include "OptimizerConfig.pb.h"
#include "Tensor.h"
#include "lr_policy.h"
#include "tensor.h"
namespace
paddle
{
namespace
optimizer
{
...
...
@@ -17,21 +17,21 @@ public:
* @brief update hook for algorithm need to traverse parameter more than
* once.
*/
ParameterOptimizer
(
BaseLr
*
lr
)
:
lr_policy
(
lr
),
num_sample_passed
(
0
)
{}
ParameterOptimizer
(
LrPolicy
*
lr
)
:
lr_policy_
(
lr
),
num_sample_passed_
(
0
)
{}
virtual
~
ParameterOptimizer
()
{
delete
parameter_
;
};
static
ParameterOptimizer
*
create
(
const
::
std
::
string
&
config_proto
);
virtual
void
u
pdate
(
const
Tensor
*
gradient
)
=
0
;
static
ParameterOptimizer
*
Create
(
const
std
::
string
&
config_proto
);
virtual
void
U
pdate
(
const
Tensor
*
gradient
)
=
0
;
virtual
real
*
get_weight
()
const
;
virtual
void
set_weight
(
Tensor
*
parameter
);
p
ublic
:
p
rotected
:
OptimizerConfig
config_
;
Tensor
*
parameter_
;
// learning rate policy
BaseLr
*
lr_policy
;
uint64_t
num_sample_passed
;
LrPolicy
*
lr_policy_
;
uint64_t
num_sample_passed
_
;
};
}
// namespace optimizer
...
...
paddle/optimizer/parameter_optimizer_test.cpp
浏览文件 @
fd8c5107
...
...
@@ -42,28 +42,28 @@ public:
virtual
void
TearDown
()
{}
void
create_sgd
()
{
config
.
set_optimizer
_name
(
"SGD"
);
config
.
set_optimizer
(
OptimizerConfig
::
SGD
);
config
.
mutable_sgd
()
->
set_momentum
(
0.0
);
config
.
mutable_sgd
()
->
set_decay
(
0.0
);
config
.
mutable_sgd
()
->
set_nesterov
(
false
);
config
.
set_lr_policy
(
"ConstLr"
);
config
.
set_lr_policy
(
OptimizerConfig
::
ConstLr
);
config
.
mutable_const_lr
()
->
set_learning_rate
(
0.1
);
ParameterOptimizer
*
opt
=
ParameterOptimizer
::
c
reate
(
config
.
SerializeAsString
());
ParameterOptimizer
::
C
reate
(
config
.
SerializeAsString
());
opts
.
push_back
(
opt
);
}
void
create_adam
()
{
config
.
set_optimizer
_name
(
"Adam"
);
config
.
set_optimizer
(
OptimizerConfig
::
Adam
);
config
.
mutable_adam
()
->
set_beta_1
(
0.9
);
config
.
mutable_adam
()
->
set_beta_2
(
0.1
);
config
.
mutable_adam
()
->
set_epsilon
(
1e-3
);
config
.
mutable_adam
()
->
set_decay
(
0.0
);
config
.
set_lr_policy
(
"ConstLr"
);
config
.
set_lr_policy
(
OptimizerConfig
::
ConstLr
);
config
.
mutable_const_lr
()
->
set_learning_rate
(
0.1
);
ParameterOptimizer
*
opt
=
ParameterOptimizer
::
c
reate
(
config
.
SerializeAsString
());
ParameterOptimizer
::
C
reate
(
config
.
SerializeAsString
());
opts
.
push_back
(
opt
);
}
void
test_set_weight
()
{
...
...
@@ -88,7 +88,7 @@ public:
void
test_update
()
{
Tensor
*
g
=
fix_n_Tensor
(
size
);
for
(
size_t
i
=
0
;
i
<
opts
.
size
();
++
i
)
{
opts
[
i
]
->
u
pdate
(
g
);
opts
[
i
]
->
U
pdate
(
g
);
}
}
...
...
paddle/optimizer/sgd_optimizer.h
浏览文件 @
fd8c5107
...
...
@@ -8,23 +8,19 @@ namespace optimizer {
class
SGDOptimizer
:
public
ParameterOptimizer
{
public:
using
ParameterOptimizer
::
parameter_
;
using
ParameterOptimizer
::
num_sample_passed
;
using
ParameterOptimizer
::
lr_policy
;
SGDOptimizer
(
double
m
,
double
d
,
bool
n
,
BaseLr
*
lr
)
:
ParameterOptimizer
(
lr
),
momentum
(
m
),
decay
(
d
),
nesterov
(
n
)
{}
SGDOptimizer
(
double
m
,
double
d
,
bool
n
,
LrPolicy
*
lr
)
:
ParameterOptimizer
(
lr
),
momentum_
(
m
),
decay_
(
d
),
nesterov_
(
n
)
{}
virtual
~
SGDOptimizer
()
{
delete
momentums_
;
}
void
u
pdate
(
const
Tensor
*
gradient
);
void
U
pdate
(
const
Tensor
*
gradient
);
void
set_weight
(
Tensor
*
p
);
real
*
get_weight
()
const
;
private:
Tensor
*
momentums_
;
double
momentum
;
double
decay
;
bool
nesterov
;
double
momentum
_
;
double
decay
_
;
bool
nesterov
_
;
};
}
// namespace optimizer
...
...
paddle/optimizer/sgd_optmizer.cc
浏览文件 @
fd8c5107
...
...
@@ -5,31 +5,32 @@ namespace optimizer {
void
SGDOptimizer
::
set_weight
(
Tensor
*
p
)
{
// ParameterOptimizer::set_weight(p);
parameter_
=
p
;
size_t
size
=
p
->
size
();
// TODO: fix it with align aware allocator bind to Tensor
if
(
momentum
!=
0.0
)
{
if
(
momentum
_
!=
0.0
)
{
real
*
ptr
=
new
real
[
size
];
momentums_
=
new
Tensor
(
ptr
,
size
);
}
}
void
SGDOptimizer
::
u
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
+=
1
;
double
learning_rate
=
lr_policy
->
get_learning_rate
(
num_sample_passed
);
void
SGDOptimizer
::
U
pdate
(
const
Tensor
*
gradient
)
{
num_sample_passed
_
+=
1
;
double
learning_rate
=
lr_policy
_
->
LearningRate
(
num_sample_passed_
);
real
velocity
=
0.0
;
Tensor
&
param
=
*
parameter_
;
const
Tensor
&
grad
=
*
gradient
;
Tensor
&
m
=
*
momentums_
;
for
(
size_t
i
=
0
;
i
<
param
.
size
();
++
i
)
{
if
(
momentum
==
0.0
)
{
velocity
=
-
learning_rate
*
grad
[
i
]
-
learning_rate
*
decay
*
param
[
i
];
if
(
momentum
_
==
0.0
)
{
velocity
=
-
learning_rate
*
grad
[
i
]
-
learning_rate
*
decay
_
*
param
[
i
];
}
else
{
m
[
i
]
=
momentum
*
m
[
i
]
-
learning_rate
*
grad
[
i
]
-
learning_rate
*
decay
*
param
[
i
];
m
[
i
]
=
momentum
_
*
m
[
i
]
-
learning_rate
*
grad
[
i
]
-
learning_rate
*
decay
_
*
param
[
i
];
velocity
=
m
[
i
];
}
if
(
nesterov
)
{
param
[
i
]
+=
momentum
*
velocity
-
learning_rate
*
grad
[
i
];
if
(
nesterov
_
)
{
param
[
i
]
+=
momentum
_
*
velocity
-
learning_rate
*
grad
[
i
];
}
else
{
param
[
i
]
+=
velocity
;
}
...
...
proto/OptimizerConfig.proto
浏览文件 @
fd8c5107
...
...
@@ -54,38 +54,40 @@ message AdamConfig {
message
ConstLr
{
// learninRate Policy
required
double
learning_rate
=
40
[
default
=
1.0
];
required
double
learning_rate
=
1
[
default
=
1.0
];
}
message
LinearLr
{
// learninRate Policy
required
double
learning_rate
=
40
[
default
=
1.0
];
optional
double
lr_decay_a
=
2
5
;
optional
double
lr_decay_b
=
26
;
required
double
learning_rate
=
1
[
default
=
1.0
];
optional
double
lr_decay_a
=
2
;
optional
double
lr_decay_b
=
3
;
}
message
OptimizerConfig
{
// common config of optimizer
// algorithm config, type : string
// SGD = 1;
// Adadelta = 2;
// Adagrad = 3;
// Adam = 4;
required
string
optimizer_name
=
1
;
enum
Optimizer
{
SGD
=
1
;
Adadelta
=
2
;
Adagrad
=
3
;
Adam
=
4
;
}
required
Optimizer
optimizer
=
1
;
optional
SGDConfig
sgd
=
3
;
optional
AdadeltaConfig
adadelta
=
4
;
optional
AdagradConfig
adagrad
=
5
;
optional
AdamConfig
adam
=
6
;
// learning rate runtime policy config
// lr_policy , type : string
// ConstLr = 0;
// LinearLr = 1;
required
string
lr_policy
=
11
;
enum
LrPolicy
{
ConstLr
=
0
;
LinearLr
=
1
;
}
required
LrPolicy
lr_policy
=
11
;
optional
ConstLr
const_lr
=
12
;
optional
LinearLr
linear_lr
=
15
;
optional
uint64
num_sample_passed
=
13
[
default
=
0
];
optional
LinearLr
linear_lr
=
13
;
// common config of optimizer
optional
double
clipnorm
=
101
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录