Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5b8a0c5d
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5b8a0c5d
编写于
6月 05, 2017
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
"optimizer remove init create with proto"
上级
3158efe9
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
130 addition
and
112 deletion
+130
-112
paddle/optimizer/CMakeLists.txt
paddle/optimizer/CMakeLists.txt
+6
-6
paddle/optimizer/Tensor.h
paddle/optimizer/Tensor.h
+7
-2
paddle/optimizer/adadelta_optimizer.cc
paddle/optimizer/adadelta_optimizer.cc
+6
-12
paddle/optimizer/adadelta_optimizer.h
paddle/optimizer/adadelta_optimizer.h
+6
-1
paddle/optimizer/adagrad_optimizer.cc
paddle/optimizer/adagrad_optimizer.cc
+0
-5
paddle/optimizer/adagrad_optimizer.h
paddle/optimizer/adagrad_optimizer.h
+5
-1
paddle/optimizer/adam_optimizer.cc
paddle/optimizer/adam_optimizer.cc
+10
-13
paddle/optimizer/adam_optimizer.h
paddle/optimizer/adam_optimizer.h
+14
-2
paddle/optimizer/lr_policy.h
paddle/optimizer/lr_policy.h
+4
-5
paddle/optimizer/optimizer.cc
paddle/optimizer/optimizer.cc
+12
-14
paddle/optimizer/optimizer.h
paddle/optimizer/optimizer.h
+2
-2
paddle/optimizer/parameter_optimizer.cc
paddle/optimizer/parameter_optimizer.cc
+29
-13
paddle/optimizer/parameter_optimizer.h
paddle/optimizer/parameter_optimizer.h
+12
-12
paddle/optimizer/regularizer.cc
paddle/optimizer/regularizer.cc
+2
-0
paddle/optimizer/sgd_optimizer.h
paddle/optimizer/sgd_optimizer.h
+12
-3
paddle/optimizer/sgd_optmizer.cc
paddle/optimizer/sgd_optmizer.cc
+2
-19
proto/OptimizerConfig.proto
proto/OptimizerConfig.proto
+1
-2
未找到文件。
paddle/optimizer/CMakeLists.txt
浏览文件 @
5b8a0c5d
include_directories
(
${
CMAKE_CURRENT_BINARY_DIR
}
)
include_directories
(
${
CMAKE_CURRENT_BINARY_DIR
}
)
set
(
OPITMIZER_SRCS
set
(
OPITMIZER_SRCS
adadelta_optimizer.cc
#
adadelta_optimizer.cc
adagrad_optimizer.cc
#
adagrad_optimizer.cc
adam_optimizer.cc
#
adam_optimizer.cc
optimizer.cc
optimizer.cc
parameter_optimizer.cc
parameter_optimizer.cc
sgd_optmizer.cc
sgd_optmizer.cc
...
@@ -11,9 +11,9 @@ set(OPITMIZER_SRCS
...
@@ -11,9 +11,9 @@ set(OPITMIZER_SRCS
)
)
set
(
OPITMIZER_Headers
set
(
OPITMIZER_Headers
adadelta_optimizer.h
#
adadelta_optimizer.h
adagrad_optimizer.h
#
adagrad_optimizer.h
adam_optimizer.h
#
adam_optimizer.h
lr_policy.h
lr_policy.h
optimizer.h
optimizer.h
parameter_optimizer.h
parameter_optimizer.h
...
...
paddle/optimizer/Tensor.h
浏览文件 @
5b8a0c5d
...
@@ -5,6 +5,7 @@
...
@@ -5,6 +5,7 @@
*/
*/
#include <string.h>
#include <string.h>
#include "optimizer.h"
#include "paddle/math/BaseMatrix.h"
#include "paddle/math/BaseMatrix.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -16,10 +17,14 @@ using TensorBase = BaseMatrixT<T>;
...
@@ -16,10 +17,14 @@ using TensorBase = BaseMatrixT<T>;
template
<
class
T
>
template
<
class
T
>
class
Tensor
:
public
TensorBase
<
T
>
{
class
Tensor
:
public
TensorBase
<
T
>
{
public:
public:
Tensor
(
T
*
data
,
int
size
)
:
TensorBase
<
T
>
(
size
,
1
,
0
,
data
,
false
,
false
)
{}
Tensor
(
T
*
data
,
int
size
)
:
TensorBase
<
T
>
(
1
,
size
,
0
,
data
,
false
,
false
)
{}
T
*
get_buffer
()
{
return
this
->
data_
;
}
T
*
get_buffer
()
{
return
this
->
data_
;
}
T
&
operator
[](
const
int
idx
)
{
CHECK
(
idx
>=
0
&&
idx
<
this
->
width_
)
<<
" out of index range"
;
return
this
->
data_
[
idx
];
}
// TODO: replace with tensorshape
// TODO: replace with tensorshape
size_t
width
()
{
return
this
->
width_
;
}
size_t
size
()
const
{
return
this
->
width_
;
}
};
};
}
// namespace optimizer
}
// namespace optimizer
...
...
paddle/optimizer/adadelta_optimizer.cc
浏览文件 @
5b8a0c5d
...
@@ -3,21 +3,14 @@
...
@@ -3,21 +3,14 @@
namespace
paddle
{
namespace
paddle
{
namespace
optimizer
{
namespace
optimizer
{
template
<
class
T
>
AdadeltaOptimizer
<
T
>::
AdadeltaOptimizer
(
const
::
paddle
::
OptimizerConfig
&
config
)
:
ParameterOptimizer
<
T
>
(
config
)
{
rho
=
config
.
adadelta
().
rho
();
epsilon
=
config
.
adadelta
().
epsilon
();
decay
=
config
.
adadelta
().
decay
();
}
template
<
class
T
>
template
<
class
T
>
void
AdadeltaOptimizer
<
T
>::
set_weight
(
const
Tensor
<
T
>*
p
)
{
void
AdadeltaOptimizer
<
T
>::
set_weight
(
const
Tensor
<
T
>*
p
)
{
size_t
size
=
p
->
width
();
size_t
size
=
p
->
size
();
T
*
gptr
=
new
T
[
size
];
T
*
gptr
=
new
T
[
size
];
accum_gradient
=
Tensor
<
T
>
(
gptr
,
size
);
accum_gradient
=
Tensor
<
T
>
(
gptr
,
size
);
T
*
dptr
=
new
T
[
size
];
T
*
dptr
=
new
T
[
size
];
accum_delta
=
Tensor
<
T
>
(
d
tp
r
,
size
);
accum_delta
=
Tensor
<
T
>
(
d
pt
r
,
size
);
T
*
dptr_current
=
new
T
[
size
];
T
*
dptr_current
=
new
T
[
size
];
update_delta
=
Tensor
<
T
>
(
dptr_current
,
size
);
update_delta
=
Tensor
<
T
>
(
dptr_current
,
size
);
}
}
...
@@ -25,8 +18,8 @@ void AdadeltaOptimizer<T>::set_weight(const Tensor<T>* p) {
...
@@ -25,8 +18,8 @@ void AdadeltaOptimizer<T>::set_weight(const Tensor<T>* p) {
template
<
class
T
>
template
<
class
T
>
void
AdadeltaOptimizer
<
T
>::
update
(
const
Tensor
<
T
>&
gradient
)
{
void
AdadeltaOptimizer
<
T
>::
update
(
const
Tensor
<
T
>&
gradient
)
{
num_sample_passed
+=
1
;
num_sample_passed
+=
1
;
double
learning_rate
=
lr_policy
->
get_learning_rate
();
double
learning_rate
=
lr_policy
->
get_learning_rate
(
num_sample_passed
);
for
(
size_t
i
=
0
;
i
<
parameter_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
parameter_
->
size
();
++
i
)
{
accum_gradient
[
i
]
=
accum_gradient
[
i
]
=
rho
*
accum_gradient
[
i
]
+
(
1.0
-
rho
)
*
gradient
[
i
]
*
gradient
[
i
];
rho
*
accum_gradient
[
i
]
+
(
1.0
-
rho
)
*
gradient
[
i
]
*
gradient
[
i
];
...
@@ -36,7 +29,8 @@ void AdadeltaOptimizer<T>::update(const Tensor<T>& gradient) {
...
@@ -36,7 +29,8 @@ void AdadeltaOptimizer<T>::update(const Tensor<T>& gradient) {
accum_delta
[
i
]
=
accum_delta
[
i
]
=
rho
*
accum_delta
[
i
]
+
(
1.0
-
rho
)
*
update_delta
[
i
]
*
update_delta
[
i
];
rho
*
accum_delta
[
i
]
+
(
1.0
-
rho
)
*
update_delta
[
i
]
*
update_delta
[
i
];
parameter_
[
i
]
-=
update_delta
[
i
]
+
decay
*
parameter_
[
i
];
parameter_
[
i
]
-=
learning_rate
*
update_delta
[
i
]
+
learning_rate
*
decay
*
parameter_
[
i
];
}
}
}
}
...
...
paddle/optimizer/adadelta_optimizer.h
浏览文件 @
5b8a0c5d
...
@@ -9,7 +9,12 @@ namespace optimizer {
...
@@ -9,7 +9,12 @@ namespace optimizer {
template
<
class
T
>
template
<
class
T
>
class
AdadeltaOptimizer
:
public
ParameterOptimizer
<
T
>
{
class
AdadeltaOptimizer
:
public
ParameterOptimizer
<
T
>
{
public:
public:
AdadeltaOptimizer
(
const
OptimizerConfig
&
config
);
using
ParameterOptimizer
<
T
>::
parameter_
;
using
ParameterOptimizer
<
T
>::
num_sample_passed
;
using
ParameterOptimizer
<
T
>::
lr_policy
;
AdadeltaOptimizer
(
double
rho
,
double
epsilon
,
double
decay
,
BaseLr
*
lr
)
:
ParameterOptimizer
<
T
>
(
lr
),
rho
(
rho
),
epsilon
(
epsilon
),
decay
(
decay
)
{}
~
AdadeltaOptimizer
()
{
~
AdadeltaOptimizer
()
{
if
(
accum_gradient
)
delete
accum_gradient
;
if
(
accum_gradient
)
delete
accum_gradient
;
if
(
accum_delta
)
delete
accum_delta
;
if
(
accum_delta
)
delete
accum_delta
;
...
...
paddle/optimizer/adagrad_optimizer.cc
浏览文件 @
5b8a0c5d
...
@@ -3,11 +3,6 @@
...
@@ -3,11 +3,6 @@
namespace
paddle
{
namespace
paddle
{
namespace
optimizer
{
namespace
optimizer
{
template
<
class
T
>
template
<
class
T
>
AdagradOptimizer
<
T
>::
AdagradOptimizer
(
const
::
paddle
::
OptimizerConfig
&
config
)
:
ParameterOptimizer
<
T
>
(
config
)
{
epsilon
=
config
.
adagrad
().
epsilon
();
decay
=
config
.
adagrad
().
decay
();
}
template
<
class
T
>
template
<
class
T
>
void
AdagradOptimizer
<
T
>::
set_weight
(
const
Tensor
<
T
>*
p
)
{
void
AdagradOptimizer
<
T
>::
set_weight
(
const
Tensor
<
T
>*
p
)
{
...
...
paddle/optimizer/adagrad_optimizer.h
浏览文件 @
5b8a0c5d
...
@@ -9,7 +9,11 @@ namespace optimizer {
...
@@ -9,7 +9,11 @@ namespace optimizer {
template
<
class
T
>
template
<
class
T
>
class
AdagradOptimizer
:
public
ParameterOptimizer
<
T
>
{
class
AdagradOptimizer
:
public
ParameterOptimizer
<
T
>
{
public:
public:
AdagradOptimizer
(
const
OptimizerConfig
&
config
);
using
ParameterOptimizer
<
T
>::
parameter_
;
using
ParameterOptimizer
<
T
>::
num_sample_passed
;
using
ParameterOptimizer
<
T
>::
lr_policy
;
AdagradOptimizer
(
double
epsilon
,
double
decay
,
BaseLr
*
lr
)
:
ParameterOptimizer
<
T
>
(
lr
),
epsilon
(
epsilon
),
decay
(
decay
)
{}
~
AdagradOptimizer
()
{
~
AdagradOptimizer
()
{
if
(
accum_gradient
)
delete
accum_gradient
;
if
(
accum_gradient
)
delete
accum_gradient
;
}
}
...
...
paddle/optimizer/adam_optimizer.cc
浏览文件 @
5b8a0c5d
...
@@ -2,14 +2,6 @@
...
@@ -2,14 +2,6 @@
namespace
paddle
{
namespace
paddle
{
namespace
optimizer
{
namespace
optimizer
{
template
<
class
T
>
AdamOptimizer
<
T
>::
AdamOptimizer
(
const
::
paddle
::
OptimizerConfig
&
config
)
:
ParameterOptimizer
<
T
>
(
config
)
{
beta_1
=
config
.
adam
().
beta_1
();
beta_2
=
config
.
adam
().
beta_2
();
epsilon
=
config
.
adam
().
epsilon
();
decay
=
config
.
adam
().
decay
();
}
template
<
class
T
>
template
<
class
T
>
void
AdamOptimizer
<
T
>::
set_weight
(
const
Tensor
<
T
>
*
p
)
{
void
AdamOptimizer
<
T
>::
set_weight
(
const
Tensor
<
T
>
*
p
)
{
...
@@ -23,11 +15,16 @@ void AdamOptimizer<T>::set_weight(const Tensor<T> *p) {
...
@@ -23,11 +15,16 @@ void AdamOptimizer<T>::set_weight(const Tensor<T> *p) {
template
<
class
T
>
template
<
class
T
>
void
AdamOptimizer
<
T
>::
update
(
const
Tensor
<
T
>
&
gradient
)
{
void
AdamOptimizer
<
T
>::
update
(
const
Tensor
<
T
>
&
gradient
)
{
num_sample_passed
+=
1
;
num_sample_passed
+=
1
;
double
learning_rate
=
lr_policy
->
get_learning_rate
();
double
learning_rate
=
lr_policy
->
get_learning_rate
(
num_sample_passed
);
for
(
size_t
i
=
0
;
i
<
parameter_
.
size
();
++
i
)
{
double
coef1
=
1.0
-
std
::
pow
(
beta_1
,
num_sample_passed
);
accum_gradient
[
i
]
+=
gradient
[
i
]
*
gradient
[
i
];
double
coef2
=
1.0
-
std
::
pow
(
beta_2
,
num_sample_passed
);
parameter_
[
i
]
+=
learning_rate
*=
std
::
sqrt
(
coef2
)
/
coef1
;
learning_rate
*
(
gradient
[
i
]
/
std
::
sqrt
(
accum_gradient
[
i
]
+
epsilon
)
+
for
(
size_t
i
=
0
;
i
<
parameter_
->
size
();
++
i
)
{
momentums_
[
i
]
=
beta_1
*
momentums_
[
i
]
+
(
1.0
-
beta_1
)
*
gradient
[
i
];
velocitys_
[
i
]
=
beta_2
*
velocitys_
[
i
]
+
(
1.0
-
beta_2
)
*
gradient
[
i
]
*
gradient
[
i
];
parameter_
[
i
]
-=
learning_rate
*
(
momentums_
[
i
]
/
std
::
sqrt
(
velocitys_
[
i
]
+
epsilon
)
+
decay
*
parameter_
[
i
]);
decay
*
parameter_
[
i
]);
}
}
}
}
...
...
paddle/optimizer/adam_optimizer.h
浏览文件 @
5b8a0c5d
...
@@ -9,8 +9,20 @@ namespace optimizer {
...
@@ -9,8 +9,20 @@ namespace optimizer {
template
<
class
T
>
template
<
class
T
>
class
AdamOptimizer
:
public
ParameterOptimizer
<
T
>
{
class
AdamOptimizer
:
public
ParameterOptimizer
<
T
>
{
public:
public:
AdamOptimizer
(
const
OptimizerConfig
&
config
);
using
ParameterOptimizer
<
T
>::
parameter_
;
~
AdamOptimizer
()
{}
using
ParameterOptimizer
<
T
>::
num_sample_passed
;
using
ParameterOptimizer
<
T
>::
lr_policy
;
AdamOptimizer
(
double
beta_1
,
double
beta_2
,
double
epsilon
,
double
decay
,
BaseLr
*
lr
)
:
ParameterOptimizer
<
T
>
(
lr
),
beta_1
(
beta_1
),
beta_2
(
beta_2
),
epsilon
(
epsilon
),
decay
(
decay
)
{}
~
AdamOptimizer
()
{
if
(
momentums_
)
delete
momentums_
;
if
(
velocitys_
)
delete
velocitys_
;
}
void
update
(
const
Tensor
<
T
>
&
gradient
);
void
update
(
const
Tensor
<
T
>
&
gradient
);
void
set_weight
(
const
Tensor
<
T
>
*
p
);
void
set_weight
(
const
Tensor
<
T
>
*
p
);
T
*
get_weight
()
const
;
T
*
get_weight
()
const
;
...
...
paddle/optimizer/lr_policy.h
浏览文件 @
5b8a0c5d
#ifndef PADDLE_OPTIMIZER_LR_POLICY_H_
#ifndef PADDLE_OPTIMIZER_LR_POLICY_H_
#define PADDLE_OPTIMIZER_LR_POLICY_H_
#define PADDLE_OPTIMIZER_LR_POLICY_H_
#include "OptimizerConfig.p
h
.h"
#include "OptimizerConfig.p
b
.h"
namespace
paddle
{
namespace
paddle
{
namespace
optimizer
{
namespace
optimizer
{
class
BaseLr
{
class
BaseLr
{
public:
public:
LrPolicyBase
(
const
OpitmizerConfig
&
config
)
{
BaseLr
(
double
lr
)
:
learning_rate
(
lr
)
{}
learning_rate
=
config
.
lr_config
().
learning_rate
();
virtual
~
BaseLr
()
{}
}
virtual
double
get_learning_rate
(
const
uint64_t
num_sample_passed
)
=
0
;
virtual
double
get_learning_rate
(
const
uint64_t
num_sample_passed
)
=
0
;
pr
ivate
:
pr
otected
:
double
learning_rate
;
double
learning_rate
;
};
};
...
...
paddle/optimizer/optimizer.cc
浏览文件 @
5b8a0c5d
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
#include "parameter_optimizer.h"
#include "parameter_optimizer.h"
template
<
class
T
>
template
<
paddle_element_type
T
>
struct
EnumToType
{};
struct
EnumToType
{};
template
<
class
T
>
template
<
class
T
>
...
@@ -11,15 +11,14 @@ struct TypeToEnum {};
...
@@ -11,15 +11,14 @@ struct TypeToEnum {};
#define MATCH_ENUM_TYPE(TYPE, ENUM) \
#define MATCH_ENUM_TYPE(TYPE, ENUM) \
template <> \
template <> \
struct TypeToEnum<
ENUM
> { \
struct TypeToEnum<
TYPE
> { \
static paddle_element_type v() { return ENUM; }; \
static paddle_element_type v() { return ENUM; }; \
static constexpr TYPE value = ENUM;
static constexpr TYPE value = ENUM; \
}
}; \
;
template <> \
template
<
>
struct EnumToType<ENUM> { \
struct
EnumToType
<
ENUM
>
{
typedef TYPE Type; \
typedef
TYPE
Type
;
}
}
MATCH_ENUM_TYPE
(
int32_t
,
PADDLE_ELEMENT_TYPE_INT32
);
MATCH_ENUM_TYPE
(
int32_t
,
PADDLE_ELEMENT_TYPE_INT32
);
MATCH_ENUM_TYPE
(
uint32_t
,
PADDLE_ELEMENT_TYPE_UINT32
);
MATCH_ENUM_TYPE
(
uint32_t
,
PADDLE_ELEMENT_TYPE_UINT32
);
...
@@ -27,11 +26,10 @@ MATCH_ENUM_TYPE(int64_t, PADDLE_ELEMENT_TYPE_INT64);
...
@@ -27,11 +26,10 @@ MATCH_ENUM_TYPE(int64_t, PADDLE_ELEMENT_TYPE_INT64);
MATCH_ENUM_TYPE
(
uint64_t
,
PADDLE_ELEMENT_TYPE_UINT64
);
MATCH_ENUM_TYPE
(
uint64_t
,
PADDLE_ELEMENT_TYPE_UINT64
);
MATCH_ENUM_TYPE
(
float
,
PADDLE_ELEMENT_TYPE_FLOAT32
);
MATCH_ENUM_TYPE
(
float
,
PADDLE_ELEMENT_TYPE_FLOAT32
);
MATCH_ENUM_TYPE
(
double
,
PADDLE_ELEMENT_TYPE_FLOAT64
);
MATCH_ENUM_TYPE
(
double
,
PADDLE_ELEMENT_TYPE_FLOAT64
);
struct
paddle_optimizer
{
struct
paddle_optimizer
{
/*! \brief optmizer in C++ side */
/*! \brief optmizer in C++ side */
paddle
::
optimizer
::
ParameterOptim
zier
*
impl
;
paddle
::
optimizer
::
ParameterOptim
izerBase
*
impl
;
};
};
paddle_optimizer
*
paddle_create_optimizer
(
const
unsigned
char
*
config_proto
,
paddle_optimizer
*
paddle_create_optimizer
(
const
unsigned
char
*
config_proto
,
...
@@ -48,7 +46,7 @@ int paddle_release_optimizer(paddle_optimizer* o) {
...
@@ -48,7 +46,7 @@ int paddle_release_optimizer(paddle_optimizer* o) {
}
}
int
paddle_update_parameter
(
paddle_optimizer
*
o
,
int
paddle_update_parameter
(
paddle_optimizer
*
o
,
paddle_element_type
data_type
,
const
paddle_element_type
data_type
,
const
void
*
grad_buffer
,
const
void
*
grad_buffer
,
int
num_bytes
)
{
int
num_bytes
)
{
auto
type
=
EnumToType
<
data_type
>::
Type
;
auto
type
=
EnumToType
<
data_type
>::
Type
;
...
@@ -59,7 +57,7 @@ int paddle_update_parameter(paddle_optimizer* o,
...
@@ -59,7 +57,7 @@ int paddle_update_parameter(paddle_optimizer* o,
}
}
int
paddle_optimizer_set_weights
(
paddle_optimizer
*
o
,
int
paddle_optimizer_set_weights
(
paddle_optimizer
*
o
,
paddle_element_type
data_type
,
const
paddle_element_type
data_type
,
void
*
param_buffer
,
void
*
param_buffer
,
int
num_bytes
)
{
int
num_bytes
)
{
auto
type
=
EnumToType
<
data_type
>::
Type
;
auto
type
=
EnumToType
<
data_type
>::
Type
;
...
...
paddle/optimizer/optimizer.h
浏览文件 @
5b8a0c5d
...
@@ -64,7 +64,7 @@ int paddle_release_optimizer(paddle_optimizer* o);
...
@@ -64,7 +64,7 @@ int paddle_release_optimizer(paddle_optimizer* o);
* @return return exec status
* @return return exec status
*/
*/
int
paddle_update_parameter
(
paddle_optimizer
*
o
,
int
paddle_update_parameter
(
paddle_optimizer
*
o
,
paddle_element_type
data_type
,
const
paddle_element_type
data_type
,
const
void
*
gradient
,
const
void
*
gradient
,
int
num_bytes
);
int
num_bytes
);
...
@@ -76,7 +76,7 @@ int paddle_update_parameter(paddle_optimizer* o,
...
@@ -76,7 +76,7 @@ int paddle_update_parameter(paddle_optimizer* o,
* @return return exec status
* @return return exec status
*/
*/
int
paddle_optimizer_set_weights
(
paddle_optimizer
*
o
,
int
paddle_optimizer_set_weights
(
paddle_optimizer
*
o
,
paddle_element_type
data_type
,
const
paddle_element_type
data_type
,
void
*
param_buffer
,
void
*
param_buffer
,
int
num_bytes
);
int
num_bytes
);
...
...
paddle/optimizer/parameter_optimizer.cc
浏览文件 @
5b8a0c5d
#include "parameter_optimizer.h"
#include <glog/logging.h>
#include <glog/logging.h>
#include "optimizer_factory.h"
#include "adadelta_optimizer.h"
#include "adagrad_optimizer.h"
#include "adam_optimizer.h"
#include "lr_policy.h"
#include "sgd_optimizer.h"
#include "parameter_optimizer.h"
namespace
paddle
{
namespace
paddle
{
namespace
optimizer
{
namespace
optimizer
{
...
@@ -12,29 +17,40 @@ ParameterOptimizer<T> *ParameterOptimizer<T>::create(
...
@@ -12,29 +17,40 @@ ParameterOptimizer<T> *ParameterOptimizer<T>::create(
CHECK
(
config
.
ParseFromString
(
config_proto
)
==
0
)
CHECK
(
config
.
ParseFromString
(
config_proto
)
==
0
)
<<
"error : optimizer config"
;
<<
"error : optimizer config"
;
CHECK
(
config_valid
(
config
)
==
0
)
<<
"error : invalid optimizer config "
;
CHECK
(
config_valid
(
config
)
==
0
)
<<
"error : invalid optimizer config "
;
BaseLr
*
lr
=
nullptr
;
switch
(
config
.
lr_policy
())
{
case
"ConstLr"
:
lr
=
new
ConstLr
(
config
.
lr_config
().
learning_rate
());
break
;
}
ParameterOptimizer
<
T
>
*
opt
=
nullptr
;
ParameterOptimizer
<
T
>
*
opt
=
nullptr
;
switch
(
config
.
optimizer_name
())
{
switch
(
config
.
optimizer_name
())
{
case
"SGD"
:
case
"SGD"
:
opt
=
new
SGDOptimizer
<
T
>
(
config
);
opt
=
new
SGDOptimizer
<
T
>
(
config
.
sgd
().
momentum
(),
config
.
sgd
().
decay
(),
config
.
sgd
().
nesterov
(),
lr
);
break
;
break
;
case
"Adagrad"
:
case
"Adagrad"
:
opt
=
new
AdagradOptimizer
<
T
>
(
config
);
opt
=
new
AdagradOptimizer
<
T
>
(
config
.
adagrad
().
epsilon
(),
config
.
adagrad
().
decay
(),
lr
);
break
;
break
;
case
"Adadelta"
:
case
"Adadelta"
:
opt
=
new
AdadeltaOptimizer
<
T
>
(
config
);
opt
=
new
AdadeltaOptimizer
<
T
>
(
config
.
adadelta
().
rho
(),
config
.
adadelta
().
epsilon
(),
config
.
adadelta
().
decay
(),
lr
);
break
;
break
;
case
"Adam"
:
case
"Adam"
:
opt
=
new
AdamOptimizer
<
T
>
(
config
);
opt
=
new
AdamOptimizer
<
T
>
(
config
.
adam
().
beta_1
(),
config
.
adam
().
beta_2
(),
config
.
adam
().
epsilon
(),
config
.
adam
().
decay
(),
lr
);
break
;
break
;
default:
opt
=
new
SGDOptimizer
<
T
>
(
config
);
}
}
switch
(
config
.
lr_policy
())
{
case
"ConstLr"
:
opt
.
lr_policy
=
new
ConstLr
(
config
);
break
;
}
return
opt
;
return
opt
;
}
}
...
...
paddle/optimizer/parameter_optimizer.h
浏览文件 @
5b8a0c5d
...
@@ -11,6 +11,12 @@
...
@@ -11,6 +11,12 @@
namespace
paddle
{
namespace
paddle
{
namespace
optimizer
{
namespace
optimizer
{
class
ParameterOptimizerBase
{
private:
ParameterOptimizerBase
(
const
ParameterOptimizerBase
&
)
=
delete
;
ParameterOptimizerBase
&
operator
=
(
const
ParameterOptimizerBase
&
)
=
delete
;
};
template
<
class
T
>
template
<
class
T
>
class
ParameterOptimizer
{
class
ParameterOptimizer
{
public:
public:
...
@@ -18,18 +24,18 @@ public:
...
@@ -18,18 +24,18 @@ public:
* @brief update hook for algorithm need to traverse parameter more than
* @brief update hook for algorithm need to traverse parameter more than
* once.
* once.
*/
*/
// use config for pack trainig state
ParameterOptimizer
(
const
OptimizerConfig
&
config
)
:
config_
(
config
){};
ParameterOptimizer
(
const
OptimizerConfig
&
config
)
:
config_
(
config
){};
ParameterOptimizer
(
BaseLr
*
lr
)
:
lr_policy
(
lr
),
num_sample_passed
(
0
)
{}
virtual
~
ParameterOptimizer
()
{
delete
parameter_
;
};
static
ParameterOptimizer
*
create
(
const
::
std
::
string
&
config_proto
);
static
ParameterOptimizer
*
create
(
const
::
std
::
string
&
config_proto
);
virtual
void
update
(
const
Tensor
&
gradient
)
=
0
;
virtual
void
update
(
const
Tensor
<
T
>
&
gradient
)
=
0
;
virtual
void
destroy
()
=
0
;
virtual
T
*
get_weight
()
const
;
virtual
T
*
get_weight
()
const
;
virtual
void
set_weight
(
const
Tensor
<
T
>
*
parameter
);
virtual
void
set_weight
(
const
Tensor
<
T
>
*
parameter
);
// package optimizer config proto in runtime for saving checkpoint
virtual
char
*
get_config_proto
();
~
ParameterOptimzier
()
{
delete
parameter_
;
}
p
rivate
:
p
ublic
:
bool
config_valid
(
::
std
::
string
&
config
)
const
;
bool
config_valid
(
::
std
::
string
&
config
)
const
;
OptimizerConfig
config_
;
OptimizerConfig
config_
;
Tensor
<
T
>
*
parameter_
;
Tensor
<
T
>
*
parameter_
;
...
@@ -37,12 +43,6 @@ private:
...
@@ -37,12 +43,6 @@ private:
// learning rate policy
// learning rate policy
BaseLr
*
lr_policy
;
BaseLr
*
lr_policy
;
uint64_t
num_sample_passed
;
uint64_t
num_sample_passed
;
ParameterOptimizer
(
const
ParameterOptimizer
&
)
=
delete
;
ParameterOptimizer
&
operator
=
(
const
ParameterOptimizer
&
)
=
delete
;
/**
* @brief indicate if use L1, L2 regularizer
*/
};
};
}
// namespace optimizer
}
// namespace optimizer
...
...
paddle/optimizer/regularizer.cc
浏览文件 @
5b8a0c5d
...
@@ -19,6 +19,8 @@ Regularizer<T>* Regularizer<T>::create(const std::string& config) {
...
@@ -19,6 +19,8 @@ Regularizer<T>* Regularizer<T>::create(const std::string& config) {
template
class
L1Regularizer
<
float
>;
template
class
L1Regularizer
<
float
>;
template
class
L1Regularizer
<
double
>;
template
class
L1Regularizer
<
double
>;
template
class
L2Regularizer
<
float
>;
template
class
L2Regularizer
<
double
>;
}
// namespace optimizer
}
// namespace optimizer
}
// namespace paddle
}
// namespace paddle
paddle/optimizer/sgd_optimizer.h
浏览文件 @
5b8a0c5d
...
@@ -9,8 +9,18 @@ namespace optimizer {
...
@@ -9,8 +9,18 @@ namespace optimizer {
template
<
class
T
>
template
<
class
T
>
class
SGDOptimizer
:
public
ParameterOptimizer
<
T
>
{
class
SGDOptimizer
:
public
ParameterOptimizer
<
T
>
{
public:
public:
SGDOptimizer
(
const
::
paddle
::
OptimizerConfig
&
config
);
using
ParameterOptimizer
<
T
>::
parameter_
;
~
SGDOptimizer
()
{
using
ParameterOptimizer
<
T
>::
num_sample_passed
;
using
ParameterOptimizer
<
T
>::
lr_policy
;
SGDOptimizer
(
double
m
,
double
d
,
bool
n
,
double
learning_rate
,
uint64_t
num_sample_passed
,
BaseLr
*
lr
)
:
ParameterOptimizer
<
T
>
(
lr
),
momentum
(
m
),
decay
(
d
),
nesterov
(
n
)
{}
virtual
~
SGDOptimizer
()
{
// clear memory by Tensor library
// clear memory by Tensor library
delete
momentums_
;
delete
momentums_
;
}
}
...
@@ -18,7 +28,6 @@ public:
...
@@ -18,7 +28,6 @@ public:
void
set_weight
(
const
Tensor
<
T
>*
p
);
void
set_weight
(
const
Tensor
<
T
>*
p
);
T
*
get_weight
()
const
;
T
*
get_weight
()
const
;
char
*
get_config_proto
();
private:
private:
Tensor
<
T
>*
momentums_
;
Tensor
<
T
>*
momentums_
;
...
...
paddle/optimizer/sgd_optmizer.cc
浏览文件 @
5b8a0c5d
...
@@ -3,18 +3,10 @@
...
@@ -3,18 +3,10 @@
namespace
paddle
{
namespace
paddle
{
namespace
optimizer
{
namespace
optimizer
{
template
<
class
T
>
SGDOptimizer
<
T
>::
SGDOptimizer
(
const
::
paddle
::
OptimizerConfig
&
config
)
:
ParameterOptimizer
<
T
>
(
config
)
{
momentum
=
config
.
sgd
().
momentum
();
decay
=
config
.
sgd
().
decay
();
nesterov
=
config
.
sgd
().
nesterov
();
}
template
<
class
T
>
template
<
class
T
>
void
SGDOptimizer
<
T
>::
set_weight
(
const
Tensor
<
T
>
*
p
)
{
void
SGDOptimizer
<
T
>::
set_weight
(
const
Tensor
<
T
>
*
p
)
{
// ParameterOptimizer::set_weight(p);
// ParameterOptimizer::set_weight(p);
size_t
size
=
p
->
width
();
size_t
size
=
p
->
size
();
// TODO: fix it with align aware allocator bind to Tensor
// TODO: fix it with align aware allocator bind to Tensor
if
(
momentum
!=
0.0
)
{
if
(
momentum
!=
0.0
)
{
T
*
ptr
=
new
T
[
size
];
T
*
ptr
=
new
T
[
size
];
...
@@ -27,7 +19,7 @@ void SGDOptimizer<T>::update(const Tensor<T> &gradient) {
...
@@ -27,7 +19,7 @@ void SGDOptimizer<T>::update(const Tensor<T> &gradient) {
num_sample_passed
+=
1
;
num_sample_passed
+=
1
;
double
learning_rate
=
lr_policy
->
get_learning_rate
(
num_sample_passed
);
double
learning_rate
=
lr_policy
->
get_learning_rate
(
num_sample_passed
);
double
velocity
=
0.0
;
double
velocity
=
0.0
;
for
(
size_t
i
=
0
;
i
<
parameter_
.
size
();
++
i
)
{
Tensor
<
T
>
&
for
(
size_t
i
=
0
;
i
<
parameter_
->
size
();
++
i
)
{
if
(
momentum
==
0.0
)
{
if
(
momentum
==
0.0
)
{
velocity
=
velocity
=
-
learning_rate
*
gradient
[
i
]
-
learning_rate
*
decay
*
parameter_
[
i
];
-
learning_rate
*
gradient
[
i
]
-
learning_rate
*
decay
*
parameter_
[
i
];
...
@@ -44,15 +36,6 @@ void SGDOptimizer<T>::update(const Tensor<T> &gradient) {
...
@@ -44,15 +36,6 @@ void SGDOptimizer<T>::update(const Tensor<T> &gradient) {
}
}
}
}
template
<
class
T
>
char
*
SGDOptimizer
<
T
>::
get_config_proto
()
{
ParameterOptimizer
::
get_config_proto
();
config
.
set_learning_rate
(
learning_rate
);
config
.
set_decay
(
decay
);
config
.
set_nesterov
(
nesterov
);
return
config
.
SerializeAsString
().
c_str
();
}
template
class
SGDOptimizer
<
float
>;
template
class
SGDOptimizer
<
float
>;
template
class
SGDOptimizer
<
double
>;
template
class
SGDOptimizer
<
double
>;
...
...
proto/OptimizerConfig.proto
浏览文件 @
5b8a0c5d
...
@@ -12,7 +12,7 @@ message SGDConfig {
...
@@ -12,7 +12,7 @@ message SGDConfig {
optional
double
momentum
=
21
[
default
=
0.0
];
optional
double
momentum
=
21
[
default
=
0.0
];
optional
double
decay
=
23
[
default
=
0.0
];
optional
double
decay
=
23
[
default
=
0.0
];
optional
bool
nesterov
=
24
[
default
=
false
];
optional
bool
nesterov
=
24
[
default
=
false
];
}
message
AdadeltaConfig
{
message
AdadeltaConfig
{
...
@@ -95,5 +95,4 @@ message OptimizerConfig {
...
@@ -95,5 +95,4 @@ message OptimizerConfig {
// common config of optimizer
// common config of optimizer
optional
double
clipnorm
=
101
;
optional
double
clipnorm
=
101
;
optional
double
clipvalue
=
102
;
optional
double
clipvalue
=
102
;
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录