Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
be273ea9
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
be273ea9
编写于
10月 19, 2022
作者:
L
Li-fAngyU
提交者:
GitHub
10月 19, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix build warning: [Wsign-compare] on linux (#46644)
上级
ddf317ed
变更
4
展开全部
隐藏空白更改
内联
并排
Showing
4 changed file
with
244 addition
and
232 deletion
+244
-232
paddle/fluid/distributed/ps/table/common_graph_table.cc
paddle/fluid/distributed/ps/table/common_graph_table.cc
+85
-73
paddle/fluid/distributed/ps/table/memory_dense_table.cc
paddle/fluid/distributed/ps/table/memory_dense_table.cc
+22
-22
paddle/fluid/distributed/ps/table/memory_sparse_table.cc
paddle/fluid/distributed/ps/table/memory_sparse_table.cc
+92
-92
paddle/fluid/distributed/ps/table/sparse_sgd_rule.cc
paddle/fluid/distributed/ps/table/sparse_sgd_rule.cc
+45
-45
未找到文件。
paddle/fluid/distributed/ps/table/common_graph_table.cc
浏览文件 @
be273ea9
此差异已折叠。
点击以展开。
paddle/fluid/distributed/ps/table/memory_dense_table.cc
浏览文件 @
be273ea9
...
@@ -21,8 +21,8 @@ namespace distributed {
...
@@ -21,8 +21,8 @@ namespace distributed {
int
FLAGS_pslib_table_save_max_retry_dense
=
3
;
int
FLAGS_pslib_table_save_max_retry_dense
=
3
;
void
MemoryDenseTable
::
CreateInitializer
(
const
std
::
string
&
attr
,
void
MemoryDenseTable
::
CreateInitializer
(
const
std
::
string
&
attr
,
const
std
::
string
&
name
)
{
const
std
::
string
&
name
)
{
auto
slices
=
string
::
split_string
<
std
::
string
>
(
attr
,
"&"
);
auto
slices
=
string
::
split_string
<
std
::
string
>
(
attr
,
"&"
);
if
(
slices
[
0
]
==
"gaussian_random"
)
{
if
(
slices
[
0
]
==
"gaussian_random"
)
{
...
@@ -60,14 +60,14 @@ int32_t MemoryDenseTable::InitializeValue() {
...
@@ -60,14 +60,14 @@ int32_t MemoryDenseTable::InitializeValue() {
values_
.
resize
(
size
);
values_
.
resize
(
size
);
total_dim_
=
0
;
total_dim_
=
0
;
for
(
int
x
=
0
;
x
<
size
;
++
x
)
{
for
(
int
x
=
0
;
x
<
size
;
++
x
)
{
auto
&
varname
=
common
.
params
()[
x
];
auto
&
varname
=
common
.
params
()[
x
];
auto
&
dim
=
common
.
dims
()[
x
];
auto
&
dim
=
common
.
dims
()[
x
];
if
(
varname
==
"Param"
)
{
if
(
varname
==
"Param"
)
{
param_dim_
=
dim
;
param_dim_
=
dim
;
param_idx_
=
x
;
param_idx_
=
x
;
}
}
auto
&
initializer
=
common
.
initializers
()[
x
];
auto
&
initializer
=
common
.
initializers
()[
x
];
total_dim_
+=
dim
;
total_dim_
+=
dim
;
CreateInitializer
(
initializer
,
varname
);
CreateInitializer
(
initializer
,
varname
);
...
@@ -81,7 +81,7 @@ int32_t MemoryDenseTable::InitializeValue() {
...
@@ -81,7 +81,7 @@ int32_t MemoryDenseTable::InitializeValue() {
fixed_len_params_dim_
=
0
;
fixed_len_params_dim_
=
0
;
for
(
int
x
=
0
;
x
<
size
;
++
x
)
{
for
(
int
x
=
0
;
x
<
size
;
++
x
)
{
auto
&
dim
=
common
.
dims
()[
x
];
auto
&
dim
=
common
.
dims
()[
x
];
if
(
static_cast
<
int
>
(
dim
)
!=
param_dim_
)
{
if
(
static_cast
<
int
>
(
dim
)
!=
param_dim_
)
{
fixed_len_params_dim_
+=
dim
;
fixed_len_params_dim_
+=
dim
;
}
else
{
}
else
{
...
@@ -124,19 +124,19 @@ int32_t MemoryDenseTable::InitializeOptimizer() {
...
@@ -124,19 +124,19 @@ int32_t MemoryDenseTable::InitializeOptimizer() {
return
0
;
return
0
;
}
}
int32_t
MemoryDenseTable
::
SetGlobalLR
(
float
*
lr
)
{
int32_t
MemoryDenseTable
::
SetGlobalLR
(
float
*
lr
)
{
_global_lr
=
lr
;
_global_lr
=
lr
;
optimizer_
->
SetGlobalLR
(
_global_lr
);
optimizer_
->
SetGlobalLR
(
_global_lr
);
return
0
;
return
0
;
}
}
int32_t
MemoryDenseTable
::
Pull
(
TableContext
&
context
)
{
int32_t
MemoryDenseTable
::
Pull
(
TableContext
&
context
)
{
CHECK
(
context
.
value_type
==
Dense
);
CHECK
(
context
.
value_type
==
Dense
);
float
*
pull_values
=
context
.
pull_context
.
values
;
float
*
pull_values
=
context
.
pull_context
.
values
;
return
PullDense
(
pull_values
,
context
.
num
);
return
PullDense
(
pull_values
,
context
.
num
);
}
}
int32_t
MemoryDenseTable
::
Push
(
TableContext
&
context
)
{
int32_t
MemoryDenseTable
::
Push
(
TableContext
&
context
)
{
CHECK
(
context
.
value_type
==
Dense
);
CHECK
(
context
.
value_type
==
Dense
);
if
(
context
.
push_context
.
values
!=
nullptr
)
{
if
(
context
.
push_context
.
values
!=
nullptr
)
{
if
(
!
context
.
push_context
.
is_param
)
{
if
(
!
context
.
push_context
.
is_param
)
{
...
@@ -148,13 +148,13 @@ int32_t MemoryDenseTable::Push(TableContext& context) {
...
@@ -148,13 +148,13 @@ int32_t MemoryDenseTable::Push(TableContext& context) {
return
0
;
return
0
;
}
}
int32_t
MemoryDenseTable
::
PullDense
(
float
*
pull_values
,
size_t
num
)
{
int32_t
MemoryDenseTable
::
PullDense
(
float
*
pull_values
,
size_t
num
)
{
std
::
copy
(
std
::
copy
(
values_
[
param_idx_
].
begin
(),
values_
[
param_idx_
].
end
(),
pull_values
);
values_
[
param_idx_
].
begin
(),
values_
[
param_idx_
].
end
(),
pull_values
);
return
0
;
return
0
;
}
}
int32_t
MemoryDenseTable
::
PushDenseParam
(
const
float
*
values
,
size_t
num
)
{
int32_t
MemoryDenseTable
::
PushDenseParam
(
const
float
*
values
,
size_t
num
)
{
PADDLE_ENFORCE_GE
(
PADDLE_ENFORCE_GE
(
num
,
num
,
param_dim_
,
param_dim_
,
...
@@ -171,7 +171,7 @@ int32_t MemoryDenseTable::Pour() {
...
@@ -171,7 +171,7 @@ int32_t MemoryDenseTable::Pour() {
return
0
;
return
0
;
}
}
int32_t
MemoryDenseTable
::
PushDense
(
const
float
*
values
,
size_t
num
)
{
int32_t
MemoryDenseTable
::
PushDense
(
const
float
*
values
,
size_t
num
)
{
if
(
sync
)
{
if
(
sync
)
{
std
::
future
<
int
>
task
=
std
::
future
<
int
>
task
=
_shards_task_pool
[
0
]
->
enqueue
([
this
,
&
values
]()
->
int
{
_shards_task_pool
[
0
]
->
enqueue
([
this
,
&
values
]()
->
int
{
...
@@ -185,7 +185,7 @@ int32_t MemoryDenseTable::PushDense(const float* values, size_t num) {
...
@@ -185,7 +185,7 @@ int32_t MemoryDenseTable::PushDense(const float* values, size_t num) {
return
0
;
return
0
;
}
}
int32_t
MemoryDenseTable
::
_PushDense
(
const
float
*
values
,
size_t
num
)
{
int32_t
MemoryDenseTable
::
_PushDense
(
const
float
*
values
,
size_t
num
)
{
PADDLE_ENFORCE_GE
(
PADDLE_ENFORCE_GE
(
num
,
num
,
param_dim_
,
param_dim_
,
...
@@ -212,8 +212,8 @@ int32_t MemoryDenseTable::_PushDense(const float* values, size_t num) {
...
@@ -212,8 +212,8 @@ int32_t MemoryDenseTable::_PushDense(const float* values, size_t num) {
return
0
;
return
0
;
}
}
int32_t
MemoryDenseTable
::
Load
(
const
std
::
string
&
path
,
int32_t
MemoryDenseTable
::
Load
(
const
std
::
string
&
path
,
const
std
::
string
&
param
)
{
const
std
::
string
&
param
)
{
if
(
param_dim_
<=
0
)
{
if
(
param_dim_
<=
0
)
{
return
0
;
return
0
;
}
}
...
@@ -249,7 +249,7 @@ int32_t MemoryDenseTable::Load(const std::string& path,
...
@@ -249,7 +249,7 @@ int32_t MemoryDenseTable::Load(const std::string& path,
try
{
try
{
int
dim_idx
=
0
;
int
dim_idx
=
0
;
float
data_buffer
[
5
];
float
data_buffer
[
5
];
float
*
data_buff_ptr
=
data_buffer
;
float
*
data_buff_ptr
=
data_buffer
;
std
::
string
line_data
;
std
::
string
line_data
;
auto
common
=
_config
.
common
();
auto
common
=
_config
.
common
();
...
@@ -319,8 +319,8 @@ int32_t MemoryDenseTable::Load(const std::string& path,
...
@@ -319,8 +319,8 @@ int32_t MemoryDenseTable::Load(const std::string& path,
return
0
;
return
0
;
}
}
int32_t
MemoryDenseTable
::
Save
(
const
std
::
string
&
path
,
int32_t
MemoryDenseTable
::
Save
(
const
std
::
string
&
path
,
const
std
::
string
&
param
)
{
const
std
::
string
&
param
)
{
int
save_param
=
atoi
(
param
.
c_str
());
int
save_param
=
atoi
(
param
.
c_str
());
uint32_t
feasign_size
;
uint32_t
feasign_size
;
VLOG
(
0
)
<<
"MemoryDenseTable::save path "
<<
path
;
VLOG
(
0
)
<<
"MemoryDenseTable::save path "
<<
path
;
...
@@ -353,7 +353,7 @@ int32_t MemoryDenseTable::Save(const std::string& path,
...
@@ -353,7 +353,7 @@ int32_t MemoryDenseTable::Save(const std::string& path,
os
.
clear
();
os
.
clear
();
os
.
str
(
""
);
os
.
str
(
""
);
os
<<
values_
[
param_col_ids_
[
0
]][
y
]
<<
" 0"
;
os
<<
values_
[
param_col_ids_
[
0
]][
y
]
<<
" 0"
;
for
(
in
t
x
=
2
;
x
<
param_col_ids_
.
size
();
++
x
)
{
for
(
size_
t
x
=
2
;
x
<
param_col_ids_
.
size
();
++
x
)
{
os
<<
" "
;
os
<<
" "
;
os
<<
values_
[
param_col_ids_
[
x
]][
y
];
os
<<
values_
[
param_col_ids_
[
x
]][
y
];
}
}
...
@@ -365,7 +365,7 @@ int32_t MemoryDenseTable::Save(const std::string& path,
...
@@ -365,7 +365,7 @@ int32_t MemoryDenseTable::Save(const std::string& path,
os
.
clear
();
os
.
clear
();
os
.
str
(
""
);
os
.
str
(
""
);
os
<<
values_
[
param_col_ids_
[
0
]][
y
];
os
<<
values_
[
param_col_ids_
[
0
]][
y
];
for
(
in
t
x
=
1
;
x
<
param_col_ids_
.
size
();
++
x
)
{
for
(
size_
t
x
=
1
;
x
<
param_col_ids_
.
size
();
++
x
)
{
os
<<
" "
;
os
<<
" "
;
os
<<
values_
[
param_col_ids_
[
x
]][
y
];
os
<<
values_
[
param_col_ids_
[
x
]][
y
];
}
}
...
@@ -383,7 +383,7 @@ int32_t MemoryDenseTable::Save(const std::string& path,
...
@@ -383,7 +383,7 @@ int32_t MemoryDenseTable::Save(const std::string& path,
auto
write_channel
=
auto
write_channel
=
_afs_client
.
open_w
(
channel_config
,
1024
*
1024
*
40
,
&
err_no
);
_afs_client
.
open_w
(
channel_config
,
1024
*
1024
*
40
,
&
err_no
);
for
(
auto
&
t
:
result_buffer_param
)
{
for
(
auto
&
t
:
result_buffer_param
)
{
if
(
0
!=
write_channel
->
write_line
(
t
))
{
if
(
0
!=
write_channel
->
write_line
(
t
))
{
++
retry_num
;
++
retry_num
;
is_write_failed
=
true
;
is_write_failed
=
true
;
...
...
paddle/fluid/distributed/ps/table/memory_sparse_table.cc
浏览文件 @
be273ea9
此差异已折叠。
点击以展开。
paddle/fluid/distributed/ps/table/sparse_sgd_rule.cc
浏览文件 @
be273ea9
...
@@ -23,7 +23,7 @@ DEFINE_bool(enable_show_scale_gradient, true, "enable show scale gradient");
...
@@ -23,7 +23,7 @@ DEFINE_bool(enable_show_scale_gradient, true, "enable show scale gradient");
namespace
paddle
{
namespace
paddle
{
namespace
distributed
{
namespace
distributed
{
void
SparseNaiveSGDRule
::
LoadConfig
(
const
SparseCommonSGDRuleParameter
&
param
,
void
SparseNaiveSGDRule
::
LoadConfig
(
const
SparseCommonSGDRuleParameter
&
param
,
size_t
emb_dim
)
{
size_t
emb_dim
)
{
_embedding_dim
=
emb_dim
;
_embedding_dim
=
emb_dim
;
auto
naive_param
=
param
.
naive
();
auto
naive_param
=
param
.
naive
();
...
@@ -41,9 +41,9 @@ void SparseNaiveSGDRule::LoadConfig(const SparseCommonSGDRuleParameter& param,
...
@@ -41,9 +41,9 @@ void SparseNaiveSGDRule::LoadConfig(const SparseCommonSGDRuleParameter& param,
}
}
}
}
void
SparseNaiveSGDRule
::
UpdateValueWork
(
float
*
w
,
void
SparseNaiveSGDRule
::
UpdateValueWork
(
float
*
w
,
float
*
sgd
,
float
*
sgd
,
const
float
*
push_value
,
const
float
*
push_value
,
float
scale
)
{
float
scale
)
{
for
(
size_t
i
=
0
;
i
<
_embedding_dim
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
_embedding_dim
;
++
i
)
{
w
[
i
]
-=
learning_rate_
*
push_value
[
i
];
w
[
i
]
-=
learning_rate_
*
push_value
[
i
];
...
@@ -51,8 +51,8 @@ void SparseNaiveSGDRule::UpdateValueWork(float* w,
...
@@ -51,8 +51,8 @@ void SparseNaiveSGDRule::UpdateValueWork(float* w,
}
}
}
}
void
SparseNaiveSGDRule
::
InitValueWork
(
float
*
value
,
void
SparseNaiveSGDRule
::
InitValueWork
(
float
*
value
,
float
*
sgd
,
float
*
sgd
,
bool
zero_init
)
{
bool
zero_init
)
{
if
(
zero_init
)
{
if
(
zero_init
)
{
for
(
size_t
i
=
0
;
i
<
_embedding_dim
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
_embedding_dim
;
++
i
)
{
...
@@ -68,7 +68,7 @@ void SparseNaiveSGDRule::InitValueWork(float* value,
...
@@ -68,7 +68,7 @@ void SparseNaiveSGDRule::InitValueWork(float* value,
}
}
}
}
}
}
void
SparseAdaGradSGDRule
::
LoadConfig
(
const
SparseCommonSGDRuleParameter
&
param
,
void
SparseAdaGradSGDRule
::
LoadConfig
(
const
SparseCommonSGDRuleParameter
&
param
,
size_t
emb_dim
)
{
size_t
emb_dim
)
{
_embedding_dim
=
emb_dim
;
_embedding_dim
=
emb_dim
;
auto
adagrad_param
=
param
.
adagrad
();
auto
adagrad_param
=
param
.
adagrad
();
...
@@ -88,11 +88,11 @@ void SparseAdaGradSGDRule::LoadConfig(const SparseCommonSGDRuleParameter& param,
...
@@ -88,11 +88,11 @@ void SparseAdaGradSGDRule::LoadConfig(const SparseCommonSGDRuleParameter& param,
}
}
}
}
void
SparseAdaGradSGDRule
::
UpdateValueWork
(
float
*
w
,
void
SparseAdaGradSGDRule
::
UpdateValueWork
(
float
*
w
,
float
*
sgd
,
float
*
sgd
,
const
float
*
grad
,
const
float
*
grad
,
float
scale
)
{
float
scale
)
{
float
&
g2sum
=
sgd
[
G2SumIndex
()];
float
&
g2sum
=
sgd
[
G2SumIndex
()];
double
add_g2sum
=
0
;
double
add_g2sum
=
0
;
for
(
size_t
i
=
0
;
i
<
_embedding_dim
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
_embedding_dim
;
i
++
)
{
...
@@ -106,8 +106,8 @@ void SparseAdaGradSGDRule::UpdateValueWork(float* w,
...
@@ -106,8 +106,8 @@ void SparseAdaGradSGDRule::UpdateValueWork(float* w,
g2sum
+=
add_g2sum
/
_embedding_dim
;
g2sum
+=
add_g2sum
/
_embedding_dim
;
}
}
void
SparseAdaGradSGDRule
::
InitValueWork
(
float
*
value
,
void
SparseAdaGradSGDRule
::
InitValueWork
(
float
*
value
,
float
*
sgd
,
float
*
sgd
,
bool
zero_init
)
{
bool
zero_init
)
{
for
(
size_t
i
=
0
;
i
<
_embedding_dim
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
_embedding_dim
;
++
i
)
{
if
(
zero_init
)
{
if
(
zero_init
)
{
...
@@ -125,7 +125,7 @@ void SparseAdaGradSGDRule::InitValueWork(float* value,
...
@@ -125,7 +125,7 @@ void SparseAdaGradSGDRule::InitValueWork(float* value,
sgd
[
G2SumIndex
()]
=
0
;
sgd
[
G2SumIndex
()]
=
0
;
}
}
void
StdAdaGradSGDRule
::
LoadConfig
(
const
SparseCommonSGDRuleParameter
&
param
,
void
StdAdaGradSGDRule
::
LoadConfig
(
const
SparseCommonSGDRuleParameter
&
param
,
size_t
emb_dim
)
{
size_t
emb_dim
)
{
_embedding_dim
=
emb_dim
;
_embedding_dim
=
emb_dim
;
auto
adagrad_param
=
param
.
adagrad
();
auto
adagrad_param
=
param
.
adagrad
();
...
@@ -145,12 +145,12 @@ void StdAdaGradSGDRule::LoadConfig(const SparseCommonSGDRuleParameter& param,
...
@@ -145,12 +145,12 @@ void StdAdaGradSGDRule::LoadConfig(const SparseCommonSGDRuleParameter& param,
}
}
}
}
void
StdAdaGradSGDRule
::
UpdateValueWork
(
float
*
w
,
void
StdAdaGradSGDRule
::
UpdateValueWork
(
float
*
w
,
float
*
sgd
,
float
*
sgd
,
const
float
*
grad
,
const
float
*
grad
,
float
scale
)
{
float
scale
)
{
for
(
size_t
i
=
0
;
i
<
_embedding_dim
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
_embedding_dim
;
i
++
)
{
float
&
g2sum
=
sgd
[
G2SumIndex
()
+
i
];
float
&
g2sum
=
sgd
[
G2SumIndex
()
+
i
];
double
scaled_grad
=
grad
[
i
]
/
scale
;
double
scaled_grad
=
grad
[
i
]
/
scale
;
w
[
i
]
-=
learning_rate_
*
scaled_grad
*
w
[
i
]
-=
learning_rate_
*
scaled_grad
*
sqrt
(
_initial_g2sum
/
(
_initial_g2sum
+
g2sum
));
sqrt
(
_initial_g2sum
/
(
_initial_g2sum
+
g2sum
));
...
@@ -159,8 +159,8 @@ void StdAdaGradSGDRule::UpdateValueWork(float* w,
...
@@ -159,8 +159,8 @@ void StdAdaGradSGDRule::UpdateValueWork(float* w,
}
}
}
}
void
StdAdaGradSGDRule
::
InitValueWork
(
float
*
value
,
void
StdAdaGradSGDRule
::
InitValueWork
(
float
*
value
,
float
*
sgd
,
float
*
sgd
,
bool
zero_init
)
{
bool
zero_init
)
{
for
(
size_t
i
=
0
;
i
<
_embedding_dim
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
_embedding_dim
;
++
i
)
{
if
(
zero_init
)
{
if
(
zero_init
)
{
...
@@ -178,7 +178,7 @@ void StdAdaGradSGDRule::InitValueWork(float* value,
...
@@ -178,7 +178,7 @@ void StdAdaGradSGDRule::InitValueWork(float* value,
}
}
}
}
void
SparseAdamSGDRule
::
LoadConfig
(
const
SparseCommonSGDRuleParameter
&
param
,
void
SparseAdamSGDRule
::
LoadConfig
(
const
SparseCommonSGDRuleParameter
&
param
,
size_t
emb_dim
)
{
size_t
emb_dim
)
{
_embedding_dim
=
emb_dim
;
_embedding_dim
=
emb_dim
;
auto
adam_param
=
param
.
adam
();
auto
adam_param
=
param
.
adam
();
...
@@ -199,15 +199,15 @@ void SparseAdamSGDRule::LoadConfig(const SparseCommonSGDRuleParameter& param,
...
@@ -199,15 +199,15 @@ void SparseAdamSGDRule::LoadConfig(const SparseCommonSGDRuleParameter& param,
}
}
}
}
void
SparseAdamSGDRule
::
UpdateValueWork
(
float
*
w
,
void
SparseAdamSGDRule
::
UpdateValueWork
(
float
*
w
,
float
*
sgd
,
float
*
sgd
,
const
float
*
grad
,
const
float
*
grad
,
float
scale
)
{
float
scale
)
{
float
*
gsum
=
sgd
+
GSumIndex
();
float
*
gsum
=
sgd
+
GSumIndex
();
float
*
g2sum
=
sgd
+
G2SumIndex
();
float
*
g2sum
=
sgd
+
G2SumIndex
();
float
*
beta1_pow
=
sgd
+
Beta1PowIndex
();
float
*
beta1_pow
=
sgd
+
Beta1PowIndex
();
float
*
beta2_pow
=
sgd
+
Beta2PowIndex
();
float
*
beta2_pow
=
sgd
+
Beta2PowIndex
();
const
float
*
g
=
grad
;
const
float
*
g
=
grad
;
float
lr
=
learning_rate_
;
float
lr
=
learning_rate_
;
float
beta1_pow_
=
*
beta1_pow
;
float
beta1_pow_
=
*
beta1_pow
;
...
@@ -227,8 +227,8 @@ void SparseAdamSGDRule::UpdateValueWork(float* w,
...
@@ -227,8 +227,8 @@ void SparseAdamSGDRule::UpdateValueWork(float* w,
(
*
beta2_pow
)
*=
_beta2_decay_rate
;
(
*
beta2_pow
)
*=
_beta2_decay_rate
;
}
}
void
SparseAdamSGDRule
::
InitValueWork
(
float
*
value
,
void
SparseAdamSGDRule
::
InitValueWork
(
float
*
value
,
float
*
sgd
,
float
*
sgd
,
bool
zero_init
)
{
bool
zero_init
)
{
for
(
size_t
i
=
0
;
i
<
_embedding_dim
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
_embedding_dim
;
++
i
)
{
if
(
zero_init
)
{
if
(
zero_init
)
{
...
@@ -253,7 +253,7 @@ void SparseAdamSGDRule::InitValueWork(float* value,
...
@@ -253,7 +253,7 @@ void SparseAdamSGDRule::InitValueWork(float* value,
}
}
void
SparseSharedAdamSGDRule
::
LoadConfig
(
void
SparseSharedAdamSGDRule
::
LoadConfig
(
const
SparseCommonSGDRuleParameter
&
param
,
size_t
emb_dim
)
{
const
SparseCommonSGDRuleParameter
&
param
,
size_t
emb_dim
)
{
_embedding_dim
=
emb_dim
;
_embedding_dim
=
emb_dim
;
auto
adam_param
=
param
.
adam
();
auto
adam_param
=
param
.
adam
();
learning_rate_
=
adam_param
.
learning_rate
();
learning_rate_
=
adam_param
.
learning_rate
();
...
@@ -273,15 +273,15 @@ void SparseSharedAdamSGDRule::LoadConfig(
...
@@ -273,15 +273,15 @@ void SparseSharedAdamSGDRule::LoadConfig(
}
}
}
}
void
SparseSharedAdamSGDRule
::
UpdateValueWork
(
float
*
w
,
void
SparseSharedAdamSGDRule
::
UpdateValueWork
(
float
*
w
,
float
*
sgd
,
float
*
sgd
,
const
float
*
grad
,
const
float
*
grad
,
float
scale
)
{
float
scale
)
{
float
*
gsum
=
sgd
+
GSumIndex
();
float
*
gsum
=
sgd
+
GSumIndex
();
float
*
g2sum
=
sgd
+
G2SumIndex
();
float
*
g2sum
=
sgd
+
G2SumIndex
();
float
*
beta1_pow
=
sgd
+
Beta1PowIndex
();
float
*
beta1_pow
=
sgd
+
Beta1PowIndex
();
float
*
beta2_pow
=
sgd
+
Beta2PowIndex
();
float
*
beta2_pow
=
sgd
+
Beta2PowIndex
();
const
float
*
g
=
grad
;
const
float
*
g
=
grad
;
float
lr
=
learning_rate_
;
float
lr
=
learning_rate_
;
float
beta1_pow_
=
*
beta1_pow
;
float
beta1_pow_
=
*
beta1_pow
;
...
@@ -292,7 +292,7 @@ void SparseSharedAdamSGDRule::UpdateValueWork(float* w,
...
@@ -292,7 +292,7 @@ void SparseSharedAdamSGDRule::UpdateValueWork(float* w,
lr
*=
sqrt
(
1
-
beta2_pow_
)
/
(
1
-
beta1_pow_
);
lr
*=
sqrt
(
1
-
beta2_pow_
)
/
(
1
-
beta1_pow_
);
double
sum_gsum
=
0.0
;
double
sum_gsum
=
0.0
;
double
sum_g2sum
=
0.0
;
double
sum_g2sum
=
0.0
;
for
(
in
t
i
=
0
;
i
<
_embedding_dim
;
i
++
)
{
for
(
size_
t
i
=
0
;
i
<
_embedding_dim
;
i
++
)
{
// Calculation
// Calculation
double
new_gsum
=
double
new_gsum
=
_beta1_decay_rate
*
gsum_
+
(
1
-
_beta1_decay_rate
)
*
g
[
i
];
_beta1_decay_rate
*
gsum_
+
(
1
-
_beta1_decay_rate
)
*
g
[
i
];
...
@@ -310,10 +310,10 @@ void SparseSharedAdamSGDRule::UpdateValueWork(float* w,
...
@@ -310,10 +310,10 @@ void SparseSharedAdamSGDRule::UpdateValueWork(float* w,
(
*
beta2_pow
)
*=
_beta2_decay_rate
;
(
*
beta2_pow
)
*=
_beta2_decay_rate
;
}
}
void
SparseSharedAdamSGDRule
::
InitValueWork
(
float
*
value
,
void
SparseSharedAdamSGDRule
::
InitValueWork
(
float
*
value
,
float
*
sgd
,
float
*
sgd
,
bool
zero_init
)
{
bool
zero_init
)
{
for
(
in
t
i
=
0
;
i
<
_embedding_dim
;
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
_embedding_dim
;
++
i
)
{
if
(
zero_init
)
{
if
(
zero_init
)
{
value
[
i
]
=
0.0
;
value
[
i
]
=
0.0
;
BoundValue
(
value
[
i
]);
BoundValue
(
value
[
i
]);
...
@@ -327,7 +327,7 @@ void SparseSharedAdamSGDRule::InitValueWork(float* value,
...
@@ -327,7 +327,7 @@ void SparseSharedAdamSGDRule::InitValueWork(float* value,
}
}
}
}
// init rule gsum and g2sum
// init rule gsum and g2sum
for
(
in
t
i
=
GSumIndex
();
i
<
Beta1PowIndex
();
i
++
)
{
for
(
size_
t
i
=
GSumIndex
();
i
<
Beta1PowIndex
();
i
++
)
{
sgd
[
i
]
=
0.0
;
sgd
[
i
]
=
0.0
;
}
}
// init beta1_pow and beta2_pow
// init beta1_pow and beta2_pow
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录