Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PARL
提交
4b646fe0
P
PARL
项目概览
PaddlePaddle
/
PARL
通知
67
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
18
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PARL
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
18
Issue
18
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4b646fe0
编写于
7月 30, 2020
作者:
B
Bo Zhou
提交者:
GitHub
7月 30, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix precision issue in the action_mapping function (#368)
* Update common.py * fix precision issue fix
#367
上级
3a27f407
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
26 addition
and
26 deletion
+26
-26
parl/core/fluid/plutils/common.py
parl/core/fluid/plutils/common.py
+1
-1
parl/utils/rl_utils.py
parl/utils/rl_utils.py
+22
-1
parl/utils/utils.py
parl/utils/utils.py
+3
-24
未找到文件。
parl/core/fluid/plutils/common.py
浏览文件 @
4b646fe0
...
@@ -37,7 +37,7 @@ def fetch_framework_var(attr_name):
...
@@ -37,7 +37,7 @@ def fetch_framework_var(attr_name):
core_var
=
scope
.
find_var
(
attr_name
)
core_var
=
scope
.
find_var
(
attr_name
)
if
core_var
==
None
:
if
core_var
==
None
:
raise
KeyError
(
raise
KeyError
(
"Unable to find the variable:{}. Synchronize param
s
ters before initialization or attr_name does not exist."
"Unable to find the variable:{}. Synchronize param
e
ters before initialization or attr_name does not exist."
.
format
(
attr_name
))
.
format
(
attr_name
))
shape
=
core_var
.
get_tensor
().
shape
()
shape
=
core_var
.
get_tensor
().
shape
()
framework_var
=
fluid
.
layers
.
create_parameter
(
framework_var
=
fluid
.
layers
.
create_parameter
(
...
...
parl/utils/rl_utils.py
浏览文件 @
4b646fe0
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
import
numpy
as
np
import
numpy
as
np
import
scipy.signal
import
scipy.signal
__all__
=
[
'calc_discount_sum_rewards'
,
'calc_gae'
]
__all__
=
[
'calc_discount_sum_rewards'
,
'calc_gae'
,
'action_mapping'
]
def
calc_discount_sum_rewards
(
rewards
,
gamma
):
def
calc_discount_sum_rewards
(
rewards
,
gamma
):
...
@@ -49,3 +49,24 @@ def calc_gae(rewards, values, next_value, gamma, lam):
...
@@ -49,3 +49,24 @@ def calc_gae(rewards, values, next_value, gamma, lam):
tds
=
rewards
+
gamma
*
np
.
append
(
values
[
1
:],
next_value
)
-
values
tds
=
rewards
+
gamma
*
np
.
append
(
values
[
1
:],
next_value
)
-
values
advantages
=
calc_discount_sum_rewards
(
tds
,
gamma
*
lam
)
advantages
=
calc_discount_sum_rewards
(
tds
,
gamma
*
lam
)
return
advantages
return
advantages
def
action_mapping
(
model_output_act
,
low_bound
,
high_bound
):
""" mapping action space [-1, 1] of model output
to new action space [low_bound, high_bound].
Args:
model_output_act: np.array, which value is in [-1, 1]
low_bound: float, low bound of env action space
high_bound: float, high bound of env action space
Returns:
action: np.array, which value is in [low_bound, high_bound]
"""
assert
np
.
all
(((
model_output_act
<=
1.0
+
1e-3
),
(
model_output_act
>=-
1.0
-
1e-3
))),
\
'the action should be in range [-1.0, 1.0]'
assert
high_bound
>
low_bound
action
=
low_bound
+
(
model_output_act
-
(
-
1.0
))
*
(
(
high_bound
-
low_bound
)
/
2.0
)
action
=
np
.
clip
(
action
,
low_bound
,
high_bound
)
return
action
parl/utils/utils.py
浏览文件 @
4b646fe0
...
@@ -18,9 +18,9 @@ import subprocess
...
@@ -18,9 +18,9 @@ import subprocess
import
numpy
as
np
import
numpy
as
np
__all__
=
[
__all__
=
[
'has_func'
,
'
action_mapping'
,
'to_str'
,
'to_byte'
,
'is_PY2'
,
'is_PY3
'
,
'has_func'
,
'
to_str'
,
'to_byte'
,
'is_PY2'
,
'is_PY3'
,
'MAX_INT32
'
,
'
MAX_INT32'
,
'_HAS_FLUID'
,
'_HAS_TORCH'
,
'_IS_WINDOWS'
,
'_IS_MAC
'
,
'
_HAS_FLUID'
,
'_HAS_TORCH'
,
'_IS_WINDOWS'
,
'_IS_MAC'
,
'kill_process
'
,
'
kill_process'
,
'
get_fluid_version'
'get_fluid_version'
]
]
...
@@ -37,27 +37,6 @@ def has_func(obj, fun):
...
@@ -37,27 +37,6 @@ def has_func(obj, fun):
return
callable
(
check_fun
)
return
callable
(
check_fun
)
def
action_mapping
(
model_output_act
,
low_bound
,
high_bound
):
""" mapping action space [-1, 1] of model output
to new action space [low_bound, high_bound].
Args:
model_output_act: np.array, which value is in [-1, 1]
low_bound: float, low bound of env action space
high_bound: float, high bound of env action space
Returns:
action: np.array, which value is in [low_bound, high_bound]
"""
assert
np
.
all
(((
model_output_act
<=
1.0
),
(
model_output_act
>=-
1.0
))),
\
'the action should be in range [-1.0, 1.0]'
assert
high_bound
>
low_bound
action
=
low_bound
+
(
model_output_act
-
(
-
1.0
))
*
(
(
high_bound
-
low_bound
)
/
2.0
)
action
=
np
.
clip
(
action
,
low_bound
,
high_bound
)
return
action
def
to_str
(
byte
):
def
to_str
(
byte
):
""" convert byte to string in pytohn2/3
""" convert byte to string in pytohn2/3
"""
"""
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录