Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PARL
提交
3f22fd3e
P
PARL
项目概览
PaddlePaddle
/
PARL
通知
67
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
18
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PARL
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
18
Issue
18
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3f22fd3e
编写于
3月 27, 2020
作者:
Z
zhoubo01
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rename SamplingKey to SamplingInfo
上级
f46ad361
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
43 addition
and
34 deletion
+43
-34
deepes/README.md
deepes/README.md
+4
-4
deepes/demo/paddle/cartpole_solver_parallel.cc
deepes/demo/paddle/cartpole_solver_parallel.cc
+7
-7
deepes/demo/torch/cartpole_solver_parallel.cc
deepes/demo/torch/cartpole_solver_parallel.cc
+6
-6
deepes/include/paddle/es_agent.h
deepes/include/paddle/es_agent.h
+2
-2
deepes/include/torch/es_agent.h
deepes/include/torch/es_agent.h
+8
-8
deepes/src/paddle/es_agent.cc
deepes/src/paddle/es_agent.cc
+6
-6
deepes/src/proto/deepes.proto
deepes/src/proto/deepes.proto
+10
-1
未找到文件。
deepes/README.md
浏览文件 @
3f22fd3e
...
@@ -11,14 +11,14 @@ auto agent = ESAgent(config);
...
@@ -11,14 +11,14 @@ auto agent = ESAgent(config);
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
auto
sampling_agnet
=
agent
->
clone
();
// clone出一个sampling agent
auto
sampling_agnet
=
agent
->
clone
();
// clone出一个sampling agent
Sampling
Key
key
;
Sampling
Info
info
;
agent
->
add_noise
(
key
);
// 参数扰动,同时保存随机种子到key
中
agent
->
add_noise
(
info
);
// 参数扰动,同时保存随机种子到info
中
int
reward
=
evaluate
(
env
,
sampling_agent
);
//评估参数
int
reward
=
evaluate
(
env
,
sampling_agent
);
//评估参数
noisy_
keys
.
push_back
(
key
);
// 记录随机噪声对应种子
noisy_
info
.
push_back
(
info
);
// 记录随机噪声对应种子
noisy_rewards
.
push_back
(
reward
);
// 记录评估结果
noisy_rewards
.
push_back
(
reward
);
// 记录评估结果
}
}
//根据评估结果、随机种子更新参数,然后重复以上过程,直到收敛。
//根据评估结果、随机种子更新参数,然后重复以上过程,直到收敛。
agent
->
update
(
noisy_
keys
,
noisy_rewards
);
agent
->
update
(
noisy_
info
,
noisy_rewards
);
```
```
## 一键运行demo列表
## 一键运行demo列表
...
...
deepes/demo/paddle/cartpole_solver_parallel.cc
浏览文件 @
3f22fd3e
...
@@ -95,25 +95,25 @@ int main(int argc, char* argv[]) {
...
@@ -95,25 +95,25 @@ int main(int argc, char* argv[]) {
sampling_agents
.
push_back
(
agent
->
clone
());
sampling_agents
.
push_back
(
agent
->
clone
());
}
}
std
::
vector
<
Sampling
Key
>
noisy_keys
;
std
::
vector
<
Sampling
Info
>
noisy_info
;
std
::
vector
<
float
>
noisy_rewards
(
ITER
,
0.0
f
);
std
::
vector
<
float
>
noisy_rewards
(
ITER
,
0.0
f
);
noisy_
keys
.
resize
(
ITER
);
noisy_
info
.
resize
(
ITER
);
omp_set_num_threads
(
10
);
omp_set_num_threads
(
10
);
for
(
int
epoch
=
0
;
epoch
<
10
00
;
++
epoch
)
{
for
(
int
epoch
=
0
;
epoch
<
3
00
;
++
epoch
)
{
#pragma omp parallel for schedule(dynamic, 1)
#pragma omp parallel for schedule(dynamic, 1)
for
(
int
i
=
0
;
i
<
ITER
;
++
i
)
{
for
(
int
i
=
0
;
i
<
ITER
;
++
i
)
{
std
::
shared_ptr
<
ESAgent
>
sampling_agent
=
sampling_agents
[
i
];
std
::
shared_ptr
<
ESAgent
>
sampling_agent
=
sampling_agents
[
i
];
Sampling
Key
key
;
Sampling
Info
info
;
bool
success
=
sampling_agent
->
add_noise
(
key
);
bool
success
=
sampling_agent
->
add_noise
(
info
);
float
reward
=
evaluate
(
envs
[
i
],
sampling_agent
);
float
reward
=
evaluate
(
envs
[
i
],
sampling_agent
);
noisy_
keys
[
i
]
=
key
;
noisy_
info
[
i
]
=
info
;
noisy_rewards
[
i
]
=
reward
;
noisy_rewards
[
i
]
=
reward
;
}
}
// NOTE: all parameters of sampling_agents will be updated
// NOTE: all parameters of sampling_agents will be updated
bool
success
=
agent
->
update
(
noisy_
keys
,
noisy_rewards
);
bool
success
=
agent
->
update
(
noisy_
info
,
noisy_rewards
);
int
reward
=
evaluate
(
envs
[
0
],
agent
);
int
reward
=
evaluate
(
envs
[
0
],
agent
);
LOG
(
INFO
)
<<
"Epoch:"
<<
epoch
<<
" Reward: "
<<
reward
;
LOG
(
INFO
)
<<
"Epoch:"
<<
epoch
<<
" Reward: "
<<
reward
;
...
...
deepes/demo/torch/cartpole_solver_parallel.cc
浏览文件 @
3f22fd3e
...
@@ -59,23 +59,23 @@ int main(int argc, char* argv[]) {
...
@@ -59,23 +59,23 @@ int main(int argc, char* argv[]) {
sampling_agents
.
push_back
(
agent
->
clone
());
sampling_agents
.
push_back
(
agent
->
clone
());
}
}
std
::
vector
<
Sampling
Key
>
noisy_keys
;
std
::
vector
<
Sampling
Info
>
noisy_info
;
std
::
vector
<
float
>
noisy_rewards
(
ITER
,
0.0
f
);
std
::
vector
<
float
>
noisy_rewards
(
ITER
,
0.0
f
);
noisy_
keys
.
resize
(
ITER
);
noisy_
info
.
resize
(
ITER
);
for
(
int
epoch
=
0
;
epoch
<
1000
;
++
epoch
)
{
for
(
int
epoch
=
0
;
epoch
<
1000
;
++
epoch
)
{
#pragma omp parallel for schedule(dynamic, 1)
#pragma omp parallel for schedule(dynamic, 1)
for
(
int
i
=
0
;
i
<
ITER
;
++
i
)
{
for
(
int
i
=
0
;
i
<
ITER
;
++
i
)
{
auto
sampling_agent
=
sampling_agents
[
i
];
auto
sampling_agent
=
sampling_agents
[
i
];
Sampling
Key
key
;
Sampling
Info
info
;
bool
success
=
sampling_agent
->
add_noise
(
key
);
bool
success
=
sampling_agent
->
add_noise
(
info
);
float
reward
=
evaluate
(
envs
[
i
],
sampling_agent
);
float
reward
=
evaluate
(
envs
[
i
],
sampling_agent
);
noisy_
keys
[
i
]
=
key
;
noisy_
info
[
i
]
=
info
;
noisy_rewards
[
i
]
=
reward
;
noisy_rewards
[
i
]
=
reward
;
}
}
// Will also update parameters of sampling_agents
// Will also update parameters of sampling_agents
bool
success
=
agent
->
update
(
noisy_
keys
,
noisy_rewards
);
bool
success
=
agent
->
update
(
noisy_
info
,
noisy_rewards
);
// Use original agent to evalute (without noise).
// Use original agent to evalute (without noise).
int
reward
=
evaluate
(
envs
[
0
],
agent
);
int
reward
=
evaluate
(
envs
[
0
],
agent
);
...
...
deepes/include/paddle/es_agent.h
浏览文件 @
3f22fd3e
...
@@ -63,11 +63,11 @@ class ESAgent {
...
@@ -63,11 +63,11 @@ class ESAgent {
* Parameters of cloned agents will also be updated.
* Parameters of cloned agents will also be updated.
*/
*/
bool
update
(
bool
update
(
std
::
vector
<
Sampling
Key
>&
noisy_keys
,
std
::
vector
<
Sampling
Info
>&
noisy_info
,
std
::
vector
<
float
>&
noisy_rewards
);
std
::
vector
<
float
>&
noisy_rewards
);
// copied parameters = original parameters + noise
// copied parameters = original parameters + noise
bool
add_noise
(
Sampling
Key
&
sampling_key
);
bool
add_noise
(
Sampling
Info
&
sampling_info
);
/**
/**
* @brief Get paddle predict
* @brief Get paddle predict
...
...
deepes/include/torch/es_agent.h
浏览文件 @
3f22fd3e
...
@@ -98,7 +98,7 @@ public:
...
@@ -98,7 +98,7 @@ public:
* Only not cloned ESAgent can call `update` function.
* Only not cloned ESAgent can call `update` function.
* Parameters of cloned agents will also be updated.
* Parameters of cloned agents will also be updated.
*/
*/
bool
update
(
std
::
vector
<
Sampling
Key
>&
noisy_keys
,
std
::
vector
<
float
>&
noisy_rewards
)
{
bool
update
(
std
::
vector
<
Sampling
Info
>&
noisy_info
,
std
::
vector
<
float
>&
noisy_rewards
)
{
if
(
_is_sampling_agent
)
{
if
(
_is_sampling_agent
)
{
LOG
(
ERROR
)
<<
"[DeepES] Cloned ESAgent cannot call update function, please use original ESAgent."
;
LOG
(
ERROR
)
<<
"[DeepES] Cloned ESAgent cannot call update function, please use original ESAgent."
;
return
false
;
return
false
;
...
@@ -107,8 +107,8 @@ public:
...
@@ -107,8 +107,8 @@ public:
compute_centered_ranks
(
noisy_rewards
);
compute_centered_ranks
(
noisy_rewards
);
memset
(
_neg_gradients
,
0
,
_param_size
*
sizeof
(
float
));
memset
(
_neg_gradients
,
0
,
_param_size
*
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
noisy_
keys
.
size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
noisy_
info
.
size
();
++
i
)
{
int
key
=
noisy_
keys
[
i
].
key
(
0
);
int
key
=
noisy_
info
[
i
].
key
(
0
);
float
reward
=
noisy_rewards
[
i
];
float
reward
=
noisy_rewards
[
i
];
bool
success
=
_sampling_method
->
resampling
(
key
,
_noise
,
_param_size
);
bool
success
=
_sampling_method
->
resampling
(
key
,
_noise
,
_param_size
);
for
(
int64_t
j
=
0
;
j
<
_param_size
;
++
j
)
{
for
(
int64_t
j
=
0
;
j
<
_param_size
;
++
j
)
{
...
@@ -116,7 +116,7 @@ public:
...
@@ -116,7 +116,7 @@ public:
}
}
}
}
for
(
int64_t
j
=
0
;
j
<
_param_size
;
++
j
)
{
for
(
int64_t
j
=
0
;
j
<
_param_size
;
++
j
)
{
_neg_gradients
[
j
]
/=
-
1.0
*
noisy_
keys
.
size
();
_neg_gradients
[
j
]
/=
-
1.0
*
noisy_
info
.
size
();
}
}
//update
//update
...
@@ -125,7 +125,7 @@ public:
...
@@ -125,7 +125,7 @@ public:
for
(
auto
&
param
:
params
)
{
for
(
auto
&
param
:
params
)
{
torch
::
Tensor
tensor
=
param
.
value
().
view
({
-
1
});
torch
::
Tensor
tensor
=
param
.
value
().
view
({
-
1
});
auto
tensor_a
=
tensor
.
accessor
<
float
,
1
>
();
auto
tensor_a
=
tensor
.
accessor
<
float
,
1
>
();
_optimizer
->
update
(
tensor_a
,
_neg_gradients
+
counter
,
tensor
.
size
(
0
),
param
.
key
());
_optimizer
->
update
(
tensor_a
,
_neg_gradients
+
counter
,
tensor
.
size
(
0
),
param
.
info
());
counter
+=
tensor
.
size
(
0
);
counter
+=
tensor
.
size
(
0
);
}
}
...
@@ -133,7 +133,7 @@ public:
...
@@ -133,7 +133,7 @@ public:
}
}
// copied parameters = original parameters + noise
// copied parameters = original parameters + noise
bool
add_noise
(
Sampling
Key
&
sampling_key
)
{
bool
add_noise
(
Sampling
Info
&
sampling_info
)
{
if
(
!
_is_sampling_agent
)
{
if
(
!
_is_sampling_agent
)
{
LOG
(
ERROR
)
<<
"[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent."
;
LOG
(
ERROR
)
<<
"[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent."
;
return
false
;
return
false
;
...
@@ -142,11 +142,11 @@ public:
...
@@ -142,11 +142,11 @@ public:
auto
sampling_params
=
_sampling_model
->
named_parameters
();
auto
sampling_params
=
_sampling_model
->
named_parameters
();
auto
params
=
_model
->
named_parameters
();
auto
params
=
_model
->
named_parameters
();
int
key
=
_sampling_method
->
sampling
(
_noise
,
_param_size
);
int
key
=
_sampling_method
->
sampling
(
_noise
,
_param_size
);
sampling_
key
.
add_key
(
key
);
sampling_
info
.
add_key
(
key
);
int64_t
counter
=
0
;
int64_t
counter
=
0
;
for
(
auto
&
param
:
sampling_params
)
{
for
(
auto
&
param
:
sampling_params
)
{
torch
::
Tensor
sampling_tensor
=
param
.
value
().
view
({
-
1
});
torch
::
Tensor
sampling_tensor
=
param
.
value
().
view
({
-
1
});
std
::
string
param_name
=
param
.
key
();
std
::
string
param_name
=
param
.
info
();
torch
::
Tensor
tensor
=
params
.
find
(
param_name
)
->
view
({
-
1
});
torch
::
Tensor
tensor
=
params
.
find
(
param_name
)
->
view
({
-
1
});
auto
sampling_tensor_a
=
sampling_tensor
.
accessor
<
float
,
1
>
();
auto
sampling_tensor_a
=
sampling_tensor
.
accessor
<
float
,
1
>
();
auto
tensor_a
=
tensor
.
accessor
<
float
,
1
>
();
auto
tensor_a
=
tensor
.
accessor
<
float
,
1
>
();
...
...
deepes/src/paddle/es_agent.cc
浏览文件 @
3f22fd3e
...
@@ -78,7 +78,7 @@ std::shared_ptr<ESAgent> ESAgent::clone() {
...
@@ -78,7 +78,7 @@ std::shared_ptr<ESAgent> ESAgent::clone() {
}
}
bool
ESAgent
::
update
(
bool
ESAgent
::
update
(
std
::
vector
<
Sampling
Key
>&
noisy_keys
,
std
::
vector
<
Sampling
Info
>&
noisy_info
,
std
::
vector
<
float
>&
noisy_rewards
)
{
std
::
vector
<
float
>&
noisy_rewards
)
{
if
(
_is_sampling_agent
)
{
if
(
_is_sampling_agent
)
{
LOG
(
ERROR
)
<<
"[DeepES] Cloned ESAgent cannot call update function, please use original ESAgent."
;
LOG
(
ERROR
)
<<
"[DeepES] Cloned ESAgent cannot call update function, please use original ESAgent."
;
...
@@ -88,8 +88,8 @@ bool ESAgent::update(
...
@@ -88,8 +88,8 @@ bool ESAgent::update(
compute_centered_ranks
(
noisy_rewards
);
compute_centered_ranks
(
noisy_rewards
);
memset
(
_neg_gradients
,
0
,
_param_size
*
sizeof
(
float
));
memset
(
_neg_gradients
,
0
,
_param_size
*
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
noisy_
keys
.
size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
noisy_
info
.
size
();
++
i
)
{
int
key
=
noisy_
keys
[
i
].
key
(
0
);
int
key
=
noisy_
info
[
i
].
key
(
0
);
float
reward
=
noisy_rewards
[
i
];
float
reward
=
noisy_rewards
[
i
];
bool
success
=
_sampling_method
->
resampling
(
key
,
_noise
,
_param_size
);
bool
success
=
_sampling_method
->
resampling
(
key
,
_noise
,
_param_size
);
for
(
int64_t
j
=
0
;
j
<
_param_size
;
++
j
)
{
for
(
int64_t
j
=
0
;
j
<
_param_size
;
++
j
)
{
...
@@ -97,7 +97,7 @@ bool ESAgent::update(
...
@@ -97,7 +97,7 @@ bool ESAgent::update(
}
}
}
}
for
(
int64_t
j
=
0
;
j
<
_param_size
;
++
j
)
{
for
(
int64_t
j
=
0
;
j
<
_param_size
;
++
j
)
{
_neg_gradients
[
j
]
/=
-
1.0
*
noisy_
keys
.
size
();
_neg_gradients
[
j
]
/=
-
1.0
*
noisy_
info
.
size
();
}
}
//update
//update
...
@@ -114,14 +114,14 @@ bool ESAgent::update(
...
@@ -114,14 +114,14 @@ bool ESAgent::update(
}
}
bool
ESAgent
::
add_noise
(
Sampling
Key
&
sampling_key
)
{
bool
ESAgent
::
add_noise
(
Sampling
Info
&
sampling_info
)
{
if
(
!
_is_sampling_agent
)
{
if
(
!
_is_sampling_agent
)
{
LOG
(
ERROR
)
<<
"[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent."
;
LOG
(
ERROR
)
<<
"[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent."
;
return
false
;
return
false
;
}
}
int
key
=
_sampling_method
->
sampling
(
_noise
,
_param_size
);
int
key
=
_sampling_method
->
sampling
(
_noise
,
_param_size
);
sampling_
key
.
add_key
(
key
);
sampling_
info
.
add_key
(
key
);
int64_t
counter
=
0
;
int64_t
counter
=
0
;
for
(
std
::
string
param_name
:
_param_names
)
{
for
(
std
::
string
param_name
:
_param_names
)
{
...
...
deepes/src/proto/deepes.proto
浏览文件 @
3f22fd3e
...
@@ -23,6 +23,8 @@ message DeepESConfig {
...
@@ -23,6 +23,8 @@ message DeepESConfig {
optional
GaussianSamplingConfig
gaussian_sampling
=
3
;
optional
GaussianSamplingConfig
gaussian_sampling
=
3
;
// Optimizer Configuration
// Optimizer Configuration
optional
OptimizerConfig
optimizer
=
4
;
optional
OptimizerConfig
optimizer
=
4
;
// AsyncESAgent Configuration
optional
AsyncESConfig
async_es
=
5
;
}
}
message
GaussianSamplingConfig
{
message
GaussianSamplingConfig
{
...
@@ -40,6 +42,13 @@ message OptimizerConfig{
...
@@ -40,6 +42,13 @@ message OptimizerConfig{
optional
float
epsilon
=
6
[
default
=
1e-8
];
optional
float
epsilon
=
6
[
default
=
1e-8
];
}
}
message
Sampling
Key
{
message
Sampling
Info
{
repeated
int32
key
=
1
;
repeated
int32
key
=
1
;
optional
int32
model_iter_id
=
2
;
}
message
AsyncESConfig
{
optional
string
model_warehouse
=
1
[
default
=
"./model_warehouse"
];
repeated
string
model_md5
=
2
;
optional
int32
max_to_keep
=
3
[
default
=
5
];
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录