Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
8610ba1c
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8610ba1c
编写于
6月 05, 2017
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
"remove get config proto"
上级
5b8a0c5d
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
150 addition
and
193 deletion
+150
-193
paddle/optimizer/Tensor.h
paddle/optimizer/Tensor.h
+12
-4
paddle/optimizer/adadelta_optimizer.cc
paddle/optimizer/adadelta_optimizer.cc
+8
-14
paddle/optimizer/adadelta_optimizer.h
paddle/optimizer/adadelta_optimizer.h
+11
-12
paddle/optimizer/adagrad_optimizer.cc
paddle/optimizer/adagrad_optimizer.cc
+8
-13
paddle/optimizer/adagrad_optimizer.h
paddle/optimizer/adagrad_optimizer.h
+6
-10
paddle/optimizer/adam_optimizer.cc
paddle/optimizer/adam_optimizer.cc
+6
-11
paddle/optimizer/adam_optimizer.h
paddle/optimizer/adam_optimizer.h
+7
-11
paddle/optimizer/lr_policy.h
paddle/optimizer/lr_policy.h
+15
-0
paddle/optimizer/optimizer.cc
paddle/optimizer/optimizer.cc
+11
-12
paddle/optimizer/parameter_optimizer.cc
paddle/optimizer/parameter_optimizer.cc
+44
-62
paddle/optimizer/parameter_optimizer.h
paddle/optimizer/parameter_optimizer.h
+4
-12
paddle/optimizer/sgd_optimizer.h
paddle/optimizer/sgd_optimizer.h
+13
-22
paddle/optimizer/sgd_optmizer.cc
paddle/optimizer/sgd_optmizer.cc
+5
-10
未找到文件。
paddle/optimizer/Tensor.h
浏览文件 @
8610ba1c
...
...
@@ -5,7 +5,6 @@
*/
#include <string.h>
#include "optimizer.h"
#include "paddle/math/BaseMatrix.h"
namespace
paddle
{
...
...
@@ -15,18 +14,27 @@ template <class T>
using
TensorBase
=
BaseMatrixT
<
T
>
;
template
<
class
T
>
class
Tensor
:
public
TensorBase
<
T
>
{
class
Tensor
T
:
public
TensorBase
<
T
>
{
public:
Tensor
(
T
*
data
,
int
size
)
:
TensorBase
<
T
>
(
1
,
size
,
0
,
data
,
false
,
false
)
{}
TensorT
(
T
*
data
,
int
size
)
:
TensorBase
<
T
>
(
1
,
size
,
0
,
data
,
false
,
false
)
{}
TensorT
(
const
TensorT
&
t
)
:
TensorBase
<
T
>
(
1
,
t
.
size
(),
0
,
t
.
get_buffer
(),
false
,
false
)
{}
TensorT
&
operator
=
(
const
TensorT
&
t
)
{
this
->
size_
=
t
.
size
();
this
->
data_
=
t
.
get_buffer
();
}
T
*
get_buffer
()
{
return
this
->
data_
;
}
T
&
operator
[](
const
int
idx
)
{
CHECK
(
idx
>=
0
&&
idx
<
this
->
width_
)
<<
"
out of index range"
;
CHECK
(
idx
>=
0
&&
idx
<
this
->
width_
)
<<
"out of index range"
;
return
this
->
data_
[
idx
];
}
// TODO: replace with tensorshape
size_t
size
()
const
{
return
this
->
width_
;
}
};
// TODO(zhihong): design problem of dynamic datatype, need to fix
typedef
TensorT
<
real
>
Tensor
;
}
// namespace optimizer
}
// namespace paddle
...
...
paddle/optimizer/adadelta_optimizer.cc
浏览文件 @
8610ba1c
...
...
@@ -4,19 +4,17 @@
namespace
paddle
{
namespace
optimizer
{
template
<
class
T
>
void
AdadeltaOptimizer
<
T
>::
set_weight
(
const
Tensor
<
T
>*
p
)
{
void
AdadeltaOptimizer
::
set_weight
(
Tensor
*
p
)
{
size_t
size
=
p
->
size
();
T
*
gptr
=
new
T
[
size
];
accum_gradient
=
Tensor
<
T
>
(
gptr
,
size
);
T
*
dptr
=
new
T
[
size
];
accum_delta
=
Tensor
<
T
>
(
dptr
,
size
);
T
*
dptr_current
=
new
T
[
size
];
update_delta
=
Tensor
<
T
>
(
dptr_current
,
size
);
real
*
gptr
=
new
real
[
size
];
accum_gradient
=
Tensor
(
gptr
,
size
);
real
*
dptr
=
new
real
[
size
];
accum_delta
=
Tensor
(
dptr
,
size
);
real
*
dptr_current
=
new
real
[
size
];
update_delta
=
Tensor
(
dptr_current
,
size
);
}
template
<
class
T
>
void
AdadeltaOptimizer
<
T
>::
update
(
const
Tensor
<
T
>&
gradient
)
{
void
AdadeltaOptimizer
::
update
(
const
Tensor
&
gradient
)
{
num_sample_passed
+=
1
;
double
learning_rate
=
lr_policy
->
get_learning_rate
(
num_sample_passed
);
for
(
size_t
i
=
0
;
i
<
parameter_
->
size
();
++
i
)
{
...
...
@@ -33,9 +31,5 @@ void AdadeltaOptimizer<T>::update(const Tensor<T>& gradient) {
learning_rate
*
update_delta
[
i
]
+
learning_rate
*
decay
*
parameter_
[
i
];
}
}
template
class
AdadeltaOptimizer
<
float
>;
template
class
AdadeltaOptimizer
<
double
>;
}
// namespace optimizer
}
// namespace paddle
paddle/optimizer/adadelta_optimizer.h
浏览文件 @
8610ba1c
...
...
@@ -6,28 +6,27 @@
namespace
paddle
{
namespace
optimizer
{
template
<
class
T
>
class
AdadeltaOptimizer
:
public
ParameterOptimizer
<
T
>
{
class
AdadeltaOptimizer
:
public
ParameterOptimizer
{
public:
using
ParameterOptimizer
<
T
>
::
parameter_
;
using
ParameterOptimizer
<
T
>
::
num_sample_passed
;
using
ParameterOptimizer
<
T
>
::
lr_policy
;
using
ParameterOptimizer
::
parameter_
;
using
ParameterOptimizer
::
num_sample_passed
;
using
ParameterOptimizer
::
lr_policy
;
AdadeltaOptimizer
(
double
rho
,
double
epsilon
,
double
decay
,
BaseLr
*
lr
)
:
ParameterOptimizer
<
T
>
(
lr
),
rho
(
rho
),
epsilon
(
epsilon
),
decay
(
decay
)
{}
:
ParameterOptimizer
(
lr
),
rho
(
rho
),
epsilon
(
epsilon
),
decay
(
decay
)
{}
~
AdadeltaOptimizer
()
{
if
(
accum_gradient
)
delete
accum_gradient
;
if
(
accum_delta
)
delete
accum_delta
;
if
(
update_delta
)
delete
update_delta
;
}
void
update
(
const
Tensor
<
T
>
&
gradient
);
void
set_weight
(
const
Tensor
<
T
>
*
p
);
T
*
get_weight
()
const
;
void
update
(
const
Tensor
&
gradient
);
void
set_weight
(
Tensor
*
p
);
real
*
get_weight
()
const
;
private:
Tensor
<
T
>
*
accum_gradient
;
Tensor
<
T
>
*
accum_delta
;
Tensor
<
T
>
*
update_delta
;
Tensor
*
accum_gradient
;
Tensor
*
accum_delta
;
Tensor
*
update_delta
;
double
rho
;
double
epsilon
;
...
...
paddle/optimizer/adagrad_optimizer.cc
浏览文件 @
8610ba1c
...
...
@@ -2,21 +2,18 @@
namespace
paddle
{
namespace
optimizer
{
template
<
class
T
>
template
<
class
T
>
void
AdagradOptimizer
<
T
>::
set_weight
(
const
Tensor
<
T
>*
p
)
{
void
AdagradOptimizer
::
set_weight
(
Tensor
*
p
)
{
size_t
size
=
p
->
width
();
T
*
gptr
=
new
T
[
size
];
accum_gradient
=
Tensor
<
T
>
(
gptr
,
size
);
T
*
dptr
=
new
T
[
size
];
accum_delta
=
Tensor
<
T
>
(
dtpr
,
size
);
T
*
dptr_current
=
new
T
[
size
];
update_delta
=
Tensor
<
T
>
(
dptr_current
,
size
);
real
*
gptr
=
new
real
[
size
];
accum_gradient
=
Tensor
(
gptr
,
size
);
real
*
dptr
=
new
real
[
size
];
accum_delta
=
Tensor
(
dtpr
,
size
);
real
*
dptr_current
=
new
real
[
size
];
update_delta
=
Tensor
(
dptr_current
,
size
);
}
template
<
class
T
>
void
AdagradOptimizer
<
T
>::
update
(
const
Tensor
<
T
>&
gradient
)
{
void
AdagradOptimizer
::
update
(
const
Tensor
&
gradient
)
{
num_sample_passed
+=
1
;
double
learning_rate
=
lr_policy
->
get_learning_rate
();
for
(
size_t
i
=
0
;
i
<
parameter_
.
size
();
++
i
)
{
...
...
@@ -27,7 +24,5 @@ void AdagradOptimizer<T>::update(const Tensor<T>& gradient) {
}
}
template
class
AdagradOptimizer
<
float
>;
template
class
AdagradOptimizer
<
double
>;
}
// namespace optimizer
}
// namespace paddle
paddle/optimizer/adagrad_optimizer.h
浏览文件 @
8610ba1c
...
...
@@ -6,23 +6,19 @@
namespace
paddle
{
namespace
optimizer
{
template
<
class
T
>
class
AdagradOptimizer
:
public
ParameterOptimizer
<
T
>
{
class
AdagradOptimizer
:
public
ParameterOptimizer
{
public:
using
ParameterOptimizer
<
T
>::
parameter_
;
using
ParameterOptimizer
<
T
>::
num_sample_passed
;
using
ParameterOptimizer
<
T
>::
lr_policy
;
AdagradOptimizer
(
double
epsilon
,
double
decay
,
BaseLr
*
lr
)
:
ParameterOptimizer
<
T
>
(
lr
),
epsilon
(
epsilon
),
decay
(
decay
)
{}
:
ParameterOptimizer
(
lr
),
epsilon
(
epsilon
),
decay
(
decay
)
{}
~
AdagradOptimizer
()
{
if
(
accum_gradient
)
delete
accum_gradient
;
}
void
update
(
const
Tensor
<
T
>
&
gradient
);
void
set_weight
(
const
Tensor
<
T
>
*
p
);
T
*
get_weight
()
const
;
void
update
(
const
Tensor
&
gradient
);
void
set_weight
(
Tensor
*
p
);
real
*
get_weight
()
const
;
private:
Tensor
<
T
>
*
accum_gradient
;
Tensor
*
accum_gradient
;
double
epsilon
;
double
decay
;
};
...
...
paddle/optimizer/adam_optimizer.cc
浏览文件 @
8610ba1c
...
...
@@ -3,17 +3,15 @@
namespace
paddle
{
namespace
optimizer
{
template
<
class
T
>
void
AdamOptimizer
<
T
>::
set_weight
(
const
Tensor
<
T
>
*
p
)
{
void
AdamOptimizer
::
set_weight
(
Tensor
*
p
)
{
size_t
size
=
p
->
width
();
T
*
mptr
=
new
T
[
size
];
momentums_
=
Tensor
<
T
>
(
mptr
,
size
);
T
*
vptr
=
new
T
[
size
];
velocitys_
=
Tensor
<
T
>
(
vtpr
,
size
);
real
*
mptr
=
new
real
[
size
];
momentums_
=
Tensor
(
mptr
,
size
);
real
*
vptr
=
new
real
[
size
];
velocitys_
=
Tensor
(
vtpr
,
size
);
}
template
<
class
T
>
void
AdamOptimizer
<
T
>::
update
(
const
Tensor
<
T
>
&
gradient
)
{
void
AdamOptimizer
::
update
(
const
Tensor
&
gradient
)
{
num_sample_passed
+=
1
;
double
learning_rate
=
lr_policy
->
get_learning_rate
(
num_sample_passed
);
double
coef1
=
1.0
-
std
::
pow
(
beta_1
,
num_sample_passed
);
...
...
@@ -28,8 +26,5 @@ void AdamOptimizer<T>::update(const Tensor<T> &gradient) {
decay
*
parameter_
[
i
]);
}
}
template
class
AdamOptimizer
<
float
>;
template
class
AdamOptimizer
<
double
>;
}
// namespace optimizer
}
// namespace paddle
paddle/optimizer/adam_optimizer.h
浏览文件 @
8610ba1c
...
...
@@ -6,15 +6,11 @@
namespace
paddle
{
namespace
optimizer
{
template
<
class
T
>
class
AdamOptimizer
:
public
ParameterOptimizer
<
T
>
{
class
AdamOptimizer
:
public
ParameterOptimizer
{
public:
using
ParameterOptimizer
<
T
>::
parameter_
;
using
ParameterOptimizer
<
T
>::
num_sample_passed
;
using
ParameterOptimizer
<
T
>::
lr_policy
;
AdamOptimizer
(
double
beta_1
,
double
beta_2
,
double
epsilon
,
double
decay
,
BaseLr
*
lr
)
:
ParameterOptimizer
<
T
>
(
lr
),
:
ParameterOptimizer
(
lr
),
beta_1
(
beta_1
),
beta_2
(
beta_2
),
epsilon
(
epsilon
),
...
...
@@ -23,13 +19,13 @@ public:
if
(
momentums_
)
delete
momentums_
;
if
(
velocitys_
)
delete
velocitys_
;
}
void
update
(
const
Tensor
<
T
>
&
gradient
);
void
set_weight
(
const
Tensor
<
T
>
*
p
);
T
*
get_weight
()
const
;
void
update
(
const
Tensor
&
gradient
);
void
set_weight
(
Tensor
*
p
);
real
*
get_weight
()
const
;
private:
Tensor
<
T
>
*
momentums_
;
Tensor
<
T
>
*
velocitys_
;
Tensor
*
momentums_
;
Tensor
*
velocitys_
;
double
beta_1
;
double
beta_2
;
double
epsilon
;
...
...
paddle/optimizer/lr_policy.h
浏览文件 @
8610ba1c
#ifndef PADDLE_OPTIMIZER_LR_POLICY_H_
#define PADDLE_OPTIMIZER_LR_POLICY_H_
#include <algorithm>
#include "OptimizerConfig.pb.h"
namespace
paddle
{
...
...
@@ -19,11 +20,25 @@ protected:
// constant learning rate policy
class
ConstLr
final
:
public
BaseLr
{
public:
ConstLr
(
double
lr
)
:
BaseLr
(
lr
){};
double
get_learning_rate
(
const
uint64_t
num_sample_passed
)
{
return
learning_rate
;
}
};
class
LinearLr
final
:
public
BaseLr
{
public:
LinearLr
(
double
lr
,
double
lr_decay_a
,
double
lr_decay_b
)
:
BaseLr
(
lr
),
lr_decay_a
(
lr_decay_a
),
lr_decay_b
(
lr_decay_b
)
{}
double
get_learning_rate
(
const
uint64_t
num_sample_passed
)
{
return
std
::
max
(
learning_rate
-
lr_decay_a
*
num_sample_passed
,
lr_decay_b
);
}
private:
double
lr_decay_a
;
double
lr_decay_b
;
};
}
// namespace optimizer
}
// namespace paddle
...
...
paddle/optimizer/optimizer.cc
浏览文件 @
8610ba1c
...
...
@@ -2,8 +2,9 @@
#include <string>
#include "parameter_optimizer.h"
using
namespace
paddle
::
optimizer
;
template
<
paddle_element_type
T
>
template
<
paddle_element_type
VALUE
>
struct
EnumToType
{};
template
<
class
T
>
...
...
@@ -26,17 +27,16 @@ MATCH_ENUM_TYPE(int64_t, PADDLE_ELEMENT_TYPE_INT64);
MATCH_ENUM_TYPE
(
uint64_t
,
PADDLE_ELEMENT_TYPE_UINT64
);
MATCH_ENUM_TYPE
(
float
,
PADDLE_ELEMENT_TYPE_FLOAT32
);
MATCH_ENUM_TYPE
(
double
,
PADDLE_ELEMENT_TYPE_FLOAT64
);
struct
paddle_optimizer
{
/*! \brief optmizer in C++ side */
paddle
::
optimizer
::
ParameterOptimizerBase
*
impl
;
struct
paddle_optimizer
{
paddle
::
optimizer
::
ParameterOptimizer
*
impl
;
};
paddle_optimizer
*
paddle_create_optimizer
(
const
unsigned
char
*
config_proto
,
int
config_proto_len
)
{
paddle_optimizer
*
optimizer
;
paddle_optimizer
*
optimizer
=
new
paddle_optimizer
;
std
::
string
config
(
config_proto
,
config_proto
+
config_proto_len
);
optimizer
->
impl
->
create
(
config_proto
);
optimizer
->
impl
=
ParameterOptimizer
::
create
(
config
);
return
optimizer
;
}
...
...
@@ -49,9 +49,9 @@ int paddle_update_parameter(paddle_optimizer* o,
const
paddle_element_type
data_type
,
const
void
*
grad_buffer
,
int
num_bytes
)
{
auto
type
=
EnumToType
<
data_type
>::
Type
;
paddle
::
Tensor
<
type
>
gradient
(
reinterpret_cast
<
type
*>
(
grad_buffer
),
num_bytes
);
// TOOD(zhihong): datatype not work. need to add the runtime datatype
auto
grad
=
reinterpret_cast
<
const
real
*>
(
grad_buffer
);
Tensor
gradient
(
const_cast
<
real
*>
(
grad
),
num_bytes
);
o
->
impl
->
update
(
gradient
);
return
PADDLE_SUCCESS
;
}
...
...
@@ -60,9 +60,8 @@ int paddle_optimizer_set_weights(paddle_optimizer* o,
const
paddle_element_type
data_type
,
void
*
param_buffer
,
int
num_bytes
)
{
auto
type
=
EnumToType
<
data_type
>::
Type
;
paddle
::
Tensor
<
type
>*
param
=
new
paddle
::
Tensor
<
type
>
(
reinterpret_cast
<
type
*>
(
param_buffer
),
num_bytes
);
// TOOD(zhihong): datatype not work. need to add the runtime datatype
Tensor
*
param
=
new
Tensor
(
reinterpret_cast
<
real
*>
(
param_buffer
),
num_bytes
);
o
->
impl
->
set_weight
(
param
);
return
PADDLE_SUCCESS
;
}
...
...
paddle/optimizer/parameter_optimizer.cc
浏览文件 @
8610ba1c
...
...
@@ -10,78 +10,60 @@
namespace
paddle
{
namespace
optimizer
{
template
<
class
T
>
ParameterOptimizer
<
T
>
*
ParameterOptimizer
<
T
>::
create
(
ParameterOptimizer
*
ParameterOptimizer
::
create
(
const
::
std
::
string
&
config_proto
)
{
paddle
::
OptimizerConfig
config
;
CHECK
(
config
.
ParseFromString
(
config_proto
)
==
0
)
<<
"error : optimizer config"
;
CHECK
(
config_valid
(
config
)
==
0
)
<<
"error : invalid optimizer config "
;
BaseLr
*
lr
=
nullptr
;
switch
(
config
.
lr_policy
())
{
case
"ConstLr"
:
lr
=
new
ConstLr
(
config
.
lr_config
().
learning_rate
());
break
;
}
ParameterOptimizer
<
T
>
*
opt
=
nullptr
;
switch
(
config
.
optimizer_name
())
{
case
"SGD"
:
opt
=
new
SGDOptimizer
<
T
>
(
config
.
sgd
().
momentum
(),
config
.
sgd
().
decay
(),
config
.
sgd
().
nesterov
(),
lr
);
break
;
case
"Adagrad"
:
opt
=
new
AdagradOptimizer
<
T
>
(
auto
select_lr_policy
=
[
=
](
const
OptimizerConfig
&
config
)
->
BaseLr
*
{
std
::
string
s
(
config
.
lr_policy
());
if
(
s
==
"ConstLr"
)
return
new
ConstLr
(
config
.
lr_config
().
learning_rate
());
if
(
s
==
"LinearLr"
)
return
new
LinearLr
(
config
.
lr_config
().
learning_rate
(),
config
.
lr_config
().
lr_decay_a
(),
config
.
lr_config
().
lr_decay_b
());
// default
return
new
ConstLr
(
config
.
lr_config
().
learning_rate
());
};
BaseLr
*
lr
=
select_lr_policy
(
config
);
auto
select_optimizer
=
[
=
](
const
OptimizerConfig
&
config
)
->
ParameterOptimizer
*
{
std
::
string
s
(
config
.
optimizer_name
());
if
(
s
==
"SGD"
)
{
return
new
SGDOptimizer
(
config
.
sgd
().
momentum
(),
config
.
sgd
().
decay
(),
config
.
sgd
().
nesterov
(),
lr
);
}
if
(
s
==
"Adadelta"
)
{
return
new
AdagradOptimizer
(
config
.
adagrad
().
epsilon
(),
config
.
adagrad
().
decay
(),
lr
);
break
;
case
"Adadelta"
:
opt
=
new
AdadeltaOptimizer
<
T
>
(
config
.
adadelta
().
rho
(),
config
.
adadelta
().
epsilon
(),
config
.
adadelta
().
decay
(),
lr
);
break
;
case
"Adam"
:
opt
=
new
AdamOptimizer
<
T
>
(
config
.
adam
().
beta_1
(),
config
.
adam
().
beta_2
(),
config
.
adam
().
epsilon
(),
config
.
adam
().
decay
(),
lr
);
break
;
}
return
opt
;
}
template
<
class
T
>
T
*
ParameterOptimizer
<
T
>::
get_weight
()
const
{
return
parameter
.
get
().
get_buffer
();
}
template
<
class
T
>
char
*
ParameterOptimizer
<
T
>::
get_config_proto
()
const
{
// set config dynamic value for save checkpoint
config_
.
lr_policy
().
set_learning_rate
(
lr_policy
->
get_learning_rate
(
num_sample_passed
));
config_
.
set_num_sample_passed
(
num_sample_passed
);
config_
.
set_iterations
(
iterations
);
return
config_
.
SerializeAsString
().
c_str
();
}
template
<
class
T
>
void
ParameterOptimizer
<
T
>::
set_weight
(
const
Tensor
<
T
>
*
p
)
{
parameter_
=
p
;
}
if
(
s
==
"Adagrad"
)
{
return
new
AdagradOptimizer
(
config
.
adagrad
().
epsilon
(),
config
.
adagrad
().
decay
(),
lr
);
}
if
(
s
==
"Adam"
)
{
return
new
AdadeltaOptimizer
(
config
.
adadelta
().
rho
(),
config
.
adadelta
().
epsilon
(),
config
.
adadelta
().
decay
(),
lr
);
}
// default
return
new
SGDOptimizer
(
config
.
sgd
().
momentum
(),
config
.
sgd
().
decay
(),
config
.
sgd
().
nesterov
(),
lr
);
};
return
select_optimizer
(
config
);
}
template
<
class
T
>
bool
ParameterOptimizer
<
T
>::
config_valid
(
const
::
std
::
string
&
config
)
const
{
// TODO(zhihong) : add more value checker, failed ASAP
return
true
;
real
*
ParameterOptimizer
::
get_weight
()
const
{
return
parameter_
->
get_buffer
();
}
template
class
ParameterOptimzier
<
float
>;
template
class
ParameterOptimzier
<
double
>;
void
ParameterOptimizer
::
set_weight
(
Tensor
*
p
)
{
parameter_
=
p
;
}
}
// namespace optimizer
}
// namespace paddle
paddle/optimizer/parameter_optimizer.h
浏览文件 @
8610ba1c
...
...
@@ -11,13 +11,6 @@
namespace
paddle
{
namespace
optimizer
{
class
ParameterOptimizerBase
{
private:
ParameterOptimizerBase
(
const
ParameterOptimizerBase
&
)
=
delete
;
ParameterOptimizerBase
&
operator
=
(
const
ParameterOptimizerBase
&
)
=
delete
;
};
template
<
class
T
>
class
ParameterOptimizer
{
public:
/**
...
...
@@ -31,14 +24,13 @@ public:
virtual
~
ParameterOptimizer
()
{
delete
parameter_
;
};
static
ParameterOptimizer
*
create
(
const
::
std
::
string
&
config_proto
);
virtual
void
update
(
const
Tensor
<
T
>
&
gradient
)
=
0
;
virtual
T
*
get_weight
()
const
;
virtual
void
set_weight
(
const
Tensor
<
T
>
*
parameter
);
virtual
void
update
(
const
Tensor
&
gradient
)
=
0
;
virtual
real
*
get_weight
()
const
;
virtual
void
set_weight
(
Tensor
*
parameter
);
public:
bool
config_valid
(
::
std
::
string
&
config
)
const
;
OptimizerConfig
config_
;
Tensor
<
T
>
*
parameter_
;
Tensor
*
parameter_
;
// learning rate policy
BaseLr
*
lr_policy
;
...
...
paddle/optimizer/sgd_optimizer.h
浏览文件 @
8610ba1c
...
...
@@ -6,31 +6,22 @@
namespace
paddle
{
namespace
optimizer
{
template
<
class
T
>
class
SGDOptimizer
:
public
ParameterOptimizer
<
T
>
{
class
SGDOptimizer
:
public
ParameterOptimizer
{
public:
using
ParameterOptimizer
<
T
>::
parameter_
;
using
ParameterOptimizer
<
T
>::
num_sample_passed
;
using
ParameterOptimizer
<
T
>::
lr_policy
;
SGDOptimizer
(
double
m
,
double
d
,
bool
n
,
double
learning_rate
,
uint64_t
num_sample_passed
,
BaseLr
*
lr
)
:
ParameterOptimizer
<
T
>
(
lr
),
momentum
(
m
),
decay
(
d
),
nesterov
(
n
)
{}
virtual
~
SGDOptimizer
()
{
// clear memory by Tensor library
delete
momentums_
;
}
void
update
(
const
Tensor
<
T
>&
gradient
);
void
set_weight
(
const
Tensor
<
T
>*
p
);
T
*
get_weight
()
const
;
using
ParameterOptimizer
::
parameter_
;
using
ParameterOptimizer
::
num_sample_passed
;
using
ParameterOptimizer
::
lr_policy
;
SGDOptimizer
(
double
m
,
double
d
,
bool
n
,
BaseLr
*
lr
)
:
ParameterOptimizer
(
lr
),
momentum
(
m
),
decay
(
d
),
nesterov
(
n
)
{}
virtual
~
SGDOptimizer
()
{
delete
momentums_
;
}
void
update
(
const
Tensor
&
gradient
);
void
set_weight
(
Tensor
*
p
);
real
*
get_weight
()
const
;
private:
Tensor
<
T
>
*
momentums_
;
Tensor
*
momentums_
;
double
momentum
;
double
decay
;
bool
nesterov
;
...
...
paddle/optimizer/sgd_optmizer.cc
浏览文件 @
8610ba1c
...
...
@@ -3,23 +3,21 @@
namespace
paddle
{
namespace
optimizer
{
template
<
class
T
>
void
SGDOptimizer
<
T
>::
set_weight
(
const
Tensor
<
T
>
*
p
)
{
void
SGDOptimizer
::
set_weight
(
Tensor
*
p
)
{
// ParameterOptimizer::set_weight(p);
size_t
size
=
p
->
size
();
// TODO: fix it with align aware allocator bind to Tensor
if
(
momentum
!=
0.0
)
{
T
*
ptr
=
new
T
[
size
];
momentums_
=
Tensor
<
T
>
(
ptr
,
size
);
real
*
ptr
=
new
real
[
size
];
momentums_
=
new
Tensor
(
ptr
,
size
);
}
}
template
<
class
T
>
void
SGDOptimizer
<
T
>::
update
(
const
Tensor
<
T
>
&
gradient
)
{
void
SGDOptimizer
::
update
(
const
Tensor
&
gradient
)
{
num_sample_passed
+=
1
;
double
learning_rate
=
lr_policy
->
get_learning_rate
(
num_sample_passed
);
double
velocity
=
0.0
;
Tensor
<
T
>
&
for
(
size_t
i
=
0
;
i
<
parameter_
->
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
parameter_
->
size
();
++
i
)
{
if
(
momentum
==
0.0
)
{
velocity
=
-
learning_rate
*
gradient
[
i
]
-
learning_rate
*
decay
*
parameter_
[
i
];
...
...
@@ -36,8 +34,5 @@ void SGDOptimizer<T>::update(const Tensor<T> &gradient) {
}
}
template
class
SGDOptimizer
<
float
>;
template
class
SGDOptimizer
<
double
>;
}
// namespace optimizer
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录