提交 a74f5365 编写于 作者: Y Yu Yang

Format code

上级 82bee14d
...@@ -18,6 +18,8 @@ import numpy as np ...@@ -18,6 +18,8 @@ import numpy as np
TERM_NUM = 24 TERM_NUM = 24
FORECASTING_NUM = 25 FORECASTING_NUM = 25
LABEL_VALUE_NUM = 4 LABEL_VALUE_NUM = 4
def initHook(settings, file_list, **kwargs): def initHook(settings, file_list, **kwargs):
""" """
Init hook is invoked before process data. It will set obj.slots and store data meta. Init hook is invoked before process data. It will set obj.slots and store data meta.
...@@ -38,40 +40,43 @@ def initHook(settings, file_list, **kwargs): ...@@ -38,40 +40,43 @@ def initHook(settings, file_list, **kwargs):
for i in range(FORECASTING_NUM): for i in range(FORECASTING_NUM):
settings.slots.append(integer_value(LABEL_VALUE_NUM)) settings.slots.append(integer_value(LABEL_VALUE_NUM))
@provider(init_hook=initHook, cache=CacheType.CACHE_PASS_IN_MEM, should_shuffle=True)
@provider(
init_hook=initHook, cache=CacheType.CACHE_PASS_IN_MEM, should_shuffle=True)
def process(settings, file_name): def process(settings, file_name):
with open(file_name) as f: with open(file_name) as f:
#abandon fields name #abandon fields name
f.next() f.next()
for row_num, line in enumerate(f): for row_num, line in enumerate(f):
speeds = map(int,line.rstrip('\r\n').split(",")[1:]) speeds = map(int, line.rstrip('\r\n').split(",")[1:])
# Get the max index. # Get the max index.
end_time = len(speeds) end_time = len(speeds)
# Scanning and generating samples # Scanning and generating samples
for i in range(TERM_NUM,end_time - FORECASTING_NUM): for i in range(TERM_NUM, end_time - FORECASTING_NUM):
# For dense slot # For dense slot
pre_spd = map(float,speeds[i-TERM_NUM:i]) pre_spd = map(float, speeds[i - TERM_NUM:i])
# Integer value need predicting, values start from 0, so every one minus 1. # Integer value need predicting, values start from 0, so every one minus 1.
fol_spd = [i-1 for i in speeds[i:i + FORECASTING_NUM]] fol_spd = [i - 1 for i in speeds[i:i + FORECASTING_NUM]]
# Predicting label is missing, abandon the sample. # Predicting label is missing, abandon the sample.
if -1 in fol_spd: if -1 in fol_spd:
continue continue
yield [pre_spd] + fol_spd yield [pre_spd] + fol_spd
def predict_initHook(settings, file_list, **kwargs): def predict_initHook(settings, file_list, **kwargs):
settings.pool_size = sys.maxint settings.pool_size = sys.maxint
settings.slots = [dense_vector(TERM_NUM)] settings.slots = [dense_vector(TERM_NUM)]
@provider(init_hook=predict_initHook,should_shuffle=False)
@provider(init_hook=predict_initHook, should_shuffle=False)
def process_predict(settings, file_name): def process_predict(settings, file_name):
with open(file_name) as f: with open(file_name) as f:
#abandon fields name #abandon fields name
f.next() f.next()
for row_num, line in enumerate(f): for row_num, line in enumerate(f):
speeds = map(int,line.rstrip('\r\n').split(",")) speeds = map(int, line.rstrip('\r\n').split(","))
end_time = len(speeds) end_time = len(speeds)
pre_spd = map(float,speeds[end_time-TERM_NUM:end_time]) pre_spd = map(float, speeds[end_time - TERM_NUM:end_time])
yield pre_spd yield pre_spd
res = [] res = []
with open('./rank-00000') as f: with open('./rank-00000') as f:
for line in f: for line in f:
pred = map(int,line.strip('\r\n;').split(";")) pred = map(int, line.strip('\r\n;').split(";"))
#raw prediction range from 0 to 3 #raw prediction range from 0 to 3
res.append([i+1 for i in pred]) res.append([i + 1 for i in pred])
file_name = open('./data/pred.list').read().strip('\r\n') file_name = open('./data/pred.list').read().strip('\r\n')
FORECASTING_NUM=24 FORECASTING_NUM = 24
header=['id', header = [
'id',
'201604200805', '201604200805',
'201604200810', '201604200810',
'201604200815', '201604200815',
...@@ -33,7 +34,7 @@ header=['id', ...@@ -33,7 +34,7 @@ header=['id',
'201604200950', '201604200950',
'201604200955', '201604200955',
'201604201000', '201604201000',
] ]
################### ###################
## To CSV format ## ## To CSV format ##
################### ###################
...@@ -43,5 +44,4 @@ with open(file_name) as f: ...@@ -43,5 +44,4 @@ with open(file_name) as f:
for row_num, line in enumerate(f): for row_num, line in enumerate(f):
fields = line.rstrip('\r\n').split(',') fields = line.rstrip('\r\n').split(',')
linkid = fields[0] linkid = fields[0]
print linkid+','+','.join(map(str,res[row_num])) print linkid + ',' + ','.join(map(str, res[row_num]))
...@@ -2,26 +2,22 @@ ...@@ -2,26 +2,22 @@
#-*python-*- #-*python-*-
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
################################### DATA Configuration ############################################# ################################### DATA Configuration #############################################
is_predict = get_config_arg('is_predict', bool, False) is_predict = get_config_arg('is_predict', bool, False)
trn = './data/train.list' if not is_predict else None trn = './data/train.list' if not is_predict else None
tst = './data/test.list' if not is_predict else './data/pred.list' tst = './data/test.list' if not is_predict else './data/pred.list'
process = 'process' if not is_predict else 'process_predict' process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(train_list=trn, define_py_data_sources2(
test_list=tst, train_list=trn, test_list=tst, module="dataprovider", obj=process)
module="dataprovider",
obj=process)
################################### Parameter Configuaration ####################################### ################################### Parameter Configuaration #######################################
TERM_NUM=24 TERM_NUM = 24
FORECASTING_NUM= 25 FORECASTING_NUM = 25
emb_size=16 emb_size = 16
batch_size=128 if not is_predict else 1 batch_size = 128 if not is_predict else 1
settings( settings(
batch_size = batch_size, batch_size=batch_size,
learning_rate = 1e-3, learning_rate=1e-3,
learning_method = RMSPropOptimizer() learning_method=RMSPropOptimizer())
)
################################### Algorithm Configuration ######################################## ################################### Algorithm Configuration ########################################
output_label = [] output_label = []
...@@ -29,15 +25,17 @@ output_label = [] ...@@ -29,15 +25,17 @@ output_label = []
link_encode = data_layer(name='link_encode', size=TERM_NUM) link_encode = data_layer(name='link_encode', size=TERM_NUM)
for i in xrange(FORECASTING_NUM): for i in xrange(FORECASTING_NUM):
# Each task share same weight. # Each task share same weight.
link_param = ParamAttr(name='_link_vec.w', initial_max=1.0, initial_min=-1.0) link_param = ParamAttr(
link_vec = fc_layer(input=link_encode,size=emb_size, param_attr=link_param) name='_link_vec.w', initial_max=1.0, initial_min=-1.0)
link_vec = fc_layer(input=link_encode, size=emb_size, param_attr=link_param)
score = fc_layer(input=link_vec, size=4, act=SoftmaxActivation()) score = fc_layer(input=link_vec, size=4, act=SoftmaxActivation())
if is_predict: if is_predict:
maxid = maxid_layer(score) maxid = maxid_layer(score)
output_label.append(maxid) output_label.append(maxid)
else: else:
# Multi-task training. # Multi-task training.
label = data_layer(name='label_%dmin'%((i+1)*5), size=4) label = data_layer(name='label_%dmin' % ((i + 1) * 5), size=4)
cls = classification_cost(input=score,name="cost_%dmin"%((i+1)*5), label=label) cls = classification_cost(
input=score, name="cost_%dmin" % ((i + 1) * 5), label=label)
output_label.append(cls) output_label.append(cls)
outputs(output_label) outputs(output_label)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册