Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
a74f5365
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a74f5365
编写于
1月 06, 2017
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Format code
上级
82bee14d
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
64 addition
and
61 deletion
+64
-61
demo/traffic_prediction/dataprovider.py
demo/traffic_prediction/dataprovider.py
+18
-13
demo/traffic_prediction/gen_result.py
demo/traffic_prediction/gen_result.py
+31
-31
demo/traffic_prediction/trainer_config.py
demo/traffic_prediction/trainer_config.py
+15
-17
未找到文件。
demo/traffic_prediction/dataprovider.py
浏览文件 @
a74f5365
...
...
@@ -18,6 +18,8 @@ import numpy as np
TERM_NUM
=
24
FORECASTING_NUM
=
25
LABEL_VALUE_NUM
=
4
def
initHook
(
settings
,
file_list
,
**
kwargs
):
"""
Init hook is invoked before process data. It will set obj.slots and store data meta.
...
...
@@ -27,8 +29,8 @@ def initHook(settings, file_list, **kwargs):
:param file_list: the meta file object, which passed from trainer_config.py,but unused in this function.
:param kwargs: unused other arguments.
"""
del
kwargs
#unused
del
kwargs
#unused
settings
.
pool_size
=
sys
.
maxint
#Use a time seires of the past as feature.
#Dense_vector's expression form is [float,float,...,float]
...
...
@@ -38,40 +40,43 @@ def initHook(settings, file_list, **kwargs):
for
i
in
range
(
FORECASTING_NUM
):
settings
.
slots
.
append
(
integer_value
(
LABEL_VALUE_NUM
))
@
provider
(
init_hook
=
initHook
,
cache
=
CacheType
.
CACHE_PASS_IN_MEM
,
should_shuffle
=
True
)
@
provider
(
init_hook
=
initHook
,
cache
=
CacheType
.
CACHE_PASS_IN_MEM
,
should_shuffle
=
True
)
def
process
(
settings
,
file_name
):
with
open
(
file_name
)
as
f
:
#abandon fields name
f
.
next
()
for
row_num
,
line
in
enumerate
(
f
):
speeds
=
map
(
int
,
line
.
rstrip
(
'
\r\n
'
).
split
(
","
)[
1
:])
for
row_num
,
line
in
enumerate
(
f
):
speeds
=
map
(
int
,
line
.
rstrip
(
'
\r\n
'
).
split
(
","
)[
1
:])
# Get the max index.
end_time
=
len
(
speeds
)
# Scanning and generating samples
for
i
in
range
(
TERM_NUM
,
end_time
-
FORECASTING_NUM
):
for
i
in
range
(
TERM_NUM
,
end_time
-
FORECASTING_NUM
):
# For dense slot
pre_spd
=
map
(
float
,
speeds
[
i
-
TERM_NUM
:
i
])
pre_spd
=
map
(
float
,
speeds
[
i
-
TERM_NUM
:
i
])
# Integer value need predicting, values start from 0, so every one minus 1.
fol_spd
=
[
i
-
1
for
i
in
speeds
[
i
:
i
+
FORECASTING_NUM
]]
fol_spd
=
[
i
-
1
for
i
in
speeds
[
i
:
i
+
FORECASTING_NUM
]]
# Predicting label is missing, abandon the sample.
if
-
1
in
fol_spd
:
continue
yield
[
pre_spd
]
+
fol_spd
def
predict_initHook
(
settings
,
file_list
,
**
kwargs
):
settings
.
pool_size
=
sys
.
maxint
settings
.
slots
=
[
dense_vector
(
TERM_NUM
)]
@
provider
(
init_hook
=
predict_initHook
,
should_shuffle
=
False
)
@
provider
(
init_hook
=
predict_initHook
,
should_shuffle
=
False
)
def
process_predict
(
settings
,
file_name
):
with
open
(
file_name
)
as
f
:
#abandon fields name
f
.
next
()
for
row_num
,
line
in
enumerate
(
f
):
speeds
=
map
(
int
,
line
.
rstrip
(
'
\r\n
'
).
split
(
","
))
speeds
=
map
(
int
,
line
.
rstrip
(
'
\r\n
'
).
split
(
","
))
end_time
=
len
(
speeds
)
pre_spd
=
map
(
float
,
speeds
[
end_time
-
TERM_NUM
:
end_time
])
pre_spd
=
map
(
float
,
speeds
[
end_time
-
TERM_NUM
:
end_time
])
yield
pre_spd
demo/traffic_prediction/gen_result.py
浏览文件 @
a74f5365
res
=
[]
with
open
(
'./rank-00000'
)
as
f
:
for
line
in
f
:
pred
=
map
(
int
,
line
.
strip
(
'
\r\n
;'
).
split
(
";"
))
pred
=
map
(
int
,
line
.
strip
(
'
\r\n
;'
).
split
(
";"
))
#raw prediction range from 0 to 3
res
.
append
([
i
+
1
for
i
in
pred
])
res
.
append
([
i
+
1
for
i
in
pred
])
file_name
=
open
(
'./data/pred.list'
).
read
().
strip
(
'
\r\n
'
)
FORECASTING_NUM
=
24
header
=
[
'id'
,
'201604200805'
,
'201604200810'
,
'201604200815'
,
'201604200820'
,
'201604200825'
,
'201604200830'
,
'201604200835'
,
'201604200840'
,
'201604200845'
,
'201604200850'
,
'201604200855'
,
'201604200900'
,
'201604200905'
,
'201604200910'
,
'201604200915'
,
'201604200920'
,
'201604200925'
,
'201604200930'
,
'201604200935'
,
'201604200940'
,
'201604200945'
,
'201604200950'
,
'201604200955'
,
'201604201000'
,
]
FORECASTING_NUM
=
24
header
=
[
'id'
,
'201604200805'
,
'201604200810'
,
'201604200815'
,
'201604200820'
,
'201604200825'
,
'201604200830'
,
'201604200835'
,
'201604200840'
,
'201604200845'
,
'201604200850'
,
'201604200855'
,
'201604200900'
,
'201604200905'
,
'201604200910'
,
'201604200915'
,
'201604200920'
,
'201604200925'
,
'201604200930'
,
'201604200935'
,
'201604200940'
,
'201604200945'
,
'201604200950'
,
'201604200955'
,
'201604201000'
,
]
###################
## To CSV format ##
###################
...
...
@@ -43,5 +44,4 @@ with open(file_name) as f:
for
row_num
,
line
in
enumerate
(
f
):
fields
=
line
.
rstrip
(
'
\r\n
'
).
split
(
','
)
linkid
=
fields
[
0
]
print
linkid
+
','
+
','
.
join
(
map
(
str
,
res
[
row_num
]))
print
linkid
+
','
+
','
.
join
(
map
(
str
,
res
[
row_num
]))
demo/traffic_prediction/trainer_config.py
浏览文件 @
a74f5365
...
...
@@ -2,26 +2,22 @@
#-*python-*-
from
paddle.trainer_config_helpers
import
*
################################### DATA Configuration #############################################
is_predict
=
get_config_arg
(
'is_predict'
,
bool
,
False
)
trn
=
'./data/train.list'
if
not
is_predict
else
None
tst
=
'./data/test.list'
if
not
is_predict
else
'./data/pred.list'
process
=
'process'
if
not
is_predict
else
'process_predict'
define_py_data_sources2
(
train_list
=
trn
,
test_list
=
tst
,
module
=
"dataprovider"
,
obj
=
process
)
define_py_data_sources2
(
train_list
=
trn
,
test_list
=
tst
,
module
=
"dataprovider"
,
obj
=
process
)
################################### Parameter Configuaration #######################################
TERM_NUM
=
24
FORECASTING_NUM
=
25
emb_size
=
16
batch_size
=
128
if
not
is_predict
else
1
TERM_NUM
=
24
FORECASTING_NUM
=
25
emb_size
=
16
batch_size
=
128
if
not
is_predict
else
1
settings
(
batch_size
=
batch_size
,
learning_rate
=
1e-3
,
learning_method
=
RMSPropOptimizer
()
)
batch_size
=
batch_size
,
learning_rate
=
1e-3
,
learning_method
=
RMSPropOptimizer
())
################################### Algorithm Configuration ########################################
output_label
=
[]
...
...
@@ -29,15 +25,17 @@ output_label = []
link_encode
=
data_layer
(
name
=
'link_encode'
,
size
=
TERM_NUM
)
for
i
in
xrange
(
FORECASTING_NUM
):
# Each task share same weight.
link_param
=
ParamAttr
(
name
=
'_link_vec.w'
,
initial_max
=
1.0
,
initial_min
=-
1.0
)
link_vec
=
fc_layer
(
input
=
link_encode
,
size
=
emb_size
,
param_attr
=
link_param
)
link_param
=
ParamAttr
(
name
=
'_link_vec.w'
,
initial_max
=
1.0
,
initial_min
=-
1.0
)
link_vec
=
fc_layer
(
input
=
link_encode
,
size
=
emb_size
,
param_attr
=
link_param
)
score
=
fc_layer
(
input
=
link_vec
,
size
=
4
,
act
=
SoftmaxActivation
())
if
is_predict
:
maxid
=
maxid_layer
(
score
)
output_label
.
append
(
maxid
)
else
:
# Multi-task training.
label
=
data_layer
(
name
=
'label_%dmin'
%
((
i
+
1
)
*
5
),
size
=
4
)
cls
=
classification_cost
(
input
=
score
,
name
=
"cost_%dmin"
%
((
i
+
1
)
*
5
),
label
=
label
)
label
=
data_layer
(
name
=
'label_%dmin'
%
((
i
+
1
)
*
5
),
size
=
4
)
cls
=
classification_cost
(
input
=
score
,
name
=
"cost_%dmin"
%
((
i
+
1
)
*
5
),
label
=
label
)
output_label
.
append
(
cls
)
outputs
(
output_label
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录