Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
411e2348
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
411e2348
编写于
1月 06, 2017
作者:
C
chengxingyi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
A traffic demo for ASC17
上级
5c0178b0
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
268 addition
and
0 deletion
+268
-0
demo/traffic_prediction/README
demo/traffic_prediction/README
+7
-0
demo/traffic_prediction/data/get_data.sh
demo/traffic_prediction/data/get_data.sh
+34
-0
demo/traffic_prediction/dataprovider.py
demo/traffic_prediction/dataprovider.py
+77
-0
demo/traffic_prediction/gen_result.py
demo/traffic_prediction/gen_result.py
+47
-0
demo/traffic_prediction/predict.sh
demo/traffic_prediction/predict.sh
+30
-0
demo/traffic_prediction/train.sh
demo/traffic_prediction/train.sh
+30
-0
demo/traffic_prediction/trainer_config.py
demo/traffic_prediction/trainer_config.py
+43
-0
未找到文件。
demo/traffic_prediction/README
0 → 100644
浏览文件 @
411e2348
run by:
cd ./data
sh get_data.sh
cd ..
sh train.sh
sh predict.sh
demo/traffic_prediction/data/get_data.sh
0 → 100755
浏览文件 @
411e2348
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set
-e
set
-x
DIR
=
"
$(
cd
"
$(
dirname
"
$0
"
)
"
;
pwd
-P
)
"
cd
$DIR
#download the dataset
echo
"Downloading traffic data..."
wget http://paddlepaddle.bj.bcebos.com/demo/traffic/traffic_data.tar.gz
#extract package
echo
"Unzipping..."
tar
-zxvf
traffic_data.tar.gz
echo
"data/speeds.csv"
>>
train.list
echo
"data/speeds.csv"
>>
test.list
echo
"data/speeds.csv"
>>
pred.list
echo
"Done."
demo/traffic_prediction/dataprovider.py
0 → 100644
浏览文件 @
411e2348
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle.trainer.PyDataProvider2
import
*
import
sys
import
numpy
as
np
TERM_NUM
=
24
FORECASTING_NUM
=
25
LABEL_VALUE_NUM
=
4
def
initHook
(
settings
,
file_list
,
**
kwargs
):
"""
Init hook is invoked before process data. It will set obj.slots and store data meta.
:param settings: global object. It will passed to process routine.
:type obj: object
:param file_list: the meta file object, which passed from trainer_config.py,but unused in this function.
:param kwargs: unused other arguments.
"""
del
kwargs
#unused
settings
.
pool_size
=
sys
.
maxint
#Use a time seires of the past as feature.
#Dense_vector's expression form is [float,float,...,float]
settings
.
slots
=
[
dense_vector
(
TERM_NUM
)]
#There are next FORECASTING_NUM fragments you need predict.
#Every predicted condition at time point has four states.
for
i
in
range
(
FORECASTING_NUM
):
settings
.
slots
.
append
(
integer_value
(
LABEL_VALUE_NUM
))
@
provider
(
init_hook
=
initHook
,
cache
=
CacheType
.
CACHE_PASS_IN_MEM
,
should_shuffle
=
True
)
def
process
(
settings
,
file_name
):
with
open
(
file_name
)
as
f
:
#abandon fields name
f
.
next
()
for
row_num
,
line
in
enumerate
(
f
):
speeds
=
map
(
int
,
line
.
rstrip
(
'
\r\n
'
).
split
(
","
)[
1
:])
# Get the max index.
end_time
=
len
(
speeds
)
# Scanning and generating samples
for
i
in
range
(
TERM_NUM
,
end_time
-
FORECASTING_NUM
):
# For dense slot
pre_spd
=
map
(
float
,
speeds
[
i
-
TERM_NUM
:
i
])
# Integer value need predicting, values start from 0, so every one minus 1.
fol_spd
=
[
i
-
1
for
i
in
speeds
[
i
:
i
+
FORECASTING_NUM
]]
# Predicting label is missing, abandon the sample.
if
-
1
in
fol_spd
:
continue
yield
[
pre_spd
]
+
fol_spd
def
predict_initHook
(
settings
,
file_list
,
**
kwargs
):
settings
.
pool_size
=
sys
.
maxint
settings
.
slots
=
[
dense_vector
(
TERM_NUM
)]
@
provider
(
init_hook
=
predict_initHook
,
should_shuffle
=
False
)
def
process_predict
(
settings
,
file_name
):
with
open
(
file_name
)
as
f
:
#abandon fields name
f
.
next
()
for
row_num
,
line
in
enumerate
(
f
):
speeds
=
map
(
int
,
line
.
rstrip
(
'
\r\n
'
).
split
(
","
))
end_time
=
len
(
speeds
)
pre_spd
=
map
(
float
,
speeds
[
end_time
-
TERM_NUM
:
end_time
])
yield
pre_spd
demo/traffic_prediction/gen_result.py
0 → 100644
浏览文件 @
411e2348
res
=
[]
with
open
(
'./rank-00000'
)
as
f
:
for
line
in
f
:
pred
=
map
(
int
,
line
.
strip
(
'
\r\n
;'
).
split
(
";"
))
#raw prediction range from 0 to 3
res
.
append
([
i
+
1
for
i
in
pred
])
file_name
=
open
(
'./data/pred.list'
).
read
().
strip
(
'
\r\n
'
)
FORECASTING_NUM
=
24
header
=
[
'id'
,
'201604200805'
,
'201604200810'
,
'201604200815'
,
'201604200820'
,
'201604200825'
,
'201604200830'
,
'201604200835'
,
'201604200840'
,
'201604200845'
,
'201604200850'
,
'201604200855'
,
'201604200900'
,
'201604200905'
,
'201604200910'
,
'201604200915'
,
'201604200920'
,
'201604200925'
,
'201604200930'
,
'201604200935'
,
'201604200940'
,
'201604200945'
,
'201604200950'
,
'201604200955'
,
'201604201000'
,
]
###################
## To CSV format ##
###################
with
open
(
file_name
)
as
f
:
f
.
next
()
print
','
.
join
(
header
)
for
row_num
,
line
in
enumerate
(
f
):
fields
=
line
.
rstrip
(
'
\r\n
'
).
split
(
','
)
linkid
=
fields
[
0
]
print
linkid
+
','
+
','
.
join
(
map
(
str
,
res
[
row_num
]))
demo/traffic_prediction/predict.sh
0 → 100755
浏览文件 @
411e2348
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set
-e
cfg
=
trainer_config.py
# pass choice
model
=
"output/pass-00000"
paddle train
\
--config
=
$cfg
\
--use_gpu
=
false
\
--job
=
test
\
--init_model_path
=
$model
\
--config_args
=
is_predict
=
1
\
--predict_output_dir
=
.
python gen_result.py
>
result.txt
rm
-rf
rank-00000
demo/traffic_prediction/train.sh
0 → 100755
浏览文件 @
411e2348
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set
-e
cfg
=
trainer_config.py
#TRAINER_BIN="./paddle_trainer"
paddle train
\
--config
=
$cfg
\
--save_dir
=
./output
\
--trainer_count
=
4
\
--log_period
=
1000
\
--dot_period
=
10
\
--num_passes
=
10
\
--use_gpu
=
false
\
--show_parameter_stats_period
=
3000
\
--test_wait
=
1
#--test_all_data_in_one_period=1 \
2>&1 |
tee
'train.log'
demo/traffic_prediction/trainer_config.py
0 → 100755
浏览文件 @
411e2348
#!/usr/bin/env/python
#-*python-*-
from
paddle.trainer_config_helpers
import
*
################################### DATA Configuration #############################################
is_predict
=
get_config_arg
(
'is_predict'
,
bool
,
False
)
trn
=
'./data/train.list'
if
not
is_predict
else
None
tst
=
'./data/test.list'
if
not
is_predict
else
'./data/pred.list'
process
=
'process'
if
not
is_predict
else
'process_predict'
define_py_data_sources2
(
train_list
=
trn
,
test_list
=
tst
,
module
=
"dataprovider"
,
obj
=
process
)
################################### Parameter Configuaration #######################################
TERM_NUM
=
24
FORECASTING_NUM
=
25
emb_size
=
16
batch_size
=
128
if
not
is_predict
else
1
settings
(
batch_size
=
batch_size
,
learning_rate
=
1e-3
,
learning_method
=
RMSPropOptimizer
()
)
################################### Algorithm Configuration ########################################
output_label
=
[]
link_encode
=
data_layer
(
name
=
'link_encode'
,
size
=
TERM_NUM
)
for
i
in
xrange
(
FORECASTING_NUM
):
# Each task share same weight.
link_param
=
ParamAttr
(
name
=
'_link_vec.w'
,
initial_max
=
1.0
,
initial_min
=-
1.0
)
link_vec
=
fc_layer
(
input
=
link_encode
,
size
=
emb_size
,
param_attr
=
link_param
)
score
=
fc_layer
(
input
=
link_vec
,
size
=
4
,
act
=
SoftmaxActivation
())
if
is_predict
:
maxid
=
maxid_layer
(
score
)
output_label
.
append
(
maxid
)
else
:
# Multi-task training.
label
=
data_layer
(
name
=
'label_%dmin'
%
((
i
+
1
)
*
5
),
size
=
4
)
cls
=
classification_cost
(
input
=
score
,
name
=
"cost_%dmin"
%
((
i
+
1
)
*
5
),
label
=
label
)
output_label
.
append
(
cls
)
outputs
(
output_label
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录