diff --git a/demo/traffic_prediction/dataprovider.py b/demo/traffic_prediction/dataprovider.py index bea0259b031f43dc946f3eb17db8a8cd4e45a79c..b91506726076c12d574eb0aba283fb59071503ef 100644 --- a/demo/traffic_prediction/dataprovider.py +++ b/demo/traffic_prediction/dataprovider.py @@ -18,6 +18,8 @@ import numpy as np TERM_NUM = 24 FORECASTING_NUM = 25 LABEL_VALUE_NUM = 4 + + def initHook(settings, file_list, **kwargs): """ Init hook is invoked before process data. It will set obj.slots and store data meta. @@ -27,8 +29,8 @@ def initHook(settings, file_list, **kwargs): :param file_list: the meta file object, which passed from trainer_config.py,but unused in this function. :param kwargs: unused other arguments. """ - del kwargs #unused - + del kwargs #unused + settings.pool_size = sys.maxint #Use a time seires of the past as feature. #Dense_vector's expression form is [float,float,...,float] @@ -38,40 +40,43 @@ def initHook(settings, file_list, **kwargs): for i in range(FORECASTING_NUM): settings.slots.append(integer_value(LABEL_VALUE_NUM)) -@provider(init_hook=initHook, cache=CacheType.CACHE_PASS_IN_MEM, should_shuffle=True) + +@provider( + init_hook=initHook, cache=CacheType.CACHE_PASS_IN_MEM, should_shuffle=True) def process(settings, file_name): with open(file_name) as f: #abandon fields name f.next() - for row_num, line in enumerate(f): - speeds = map(int,line.rstrip('\r\n').split(",")[1:]) + for row_num, line in enumerate(f): + speeds = map(int, line.rstrip('\r\n').split(",")[1:]) # Get the max index. end_time = len(speeds) # Scanning and generating samples - for i in range(TERM_NUM,end_time - FORECASTING_NUM): + for i in range(TERM_NUM, end_time - FORECASTING_NUM): # For dense slot - pre_spd = map(float,speeds[i-TERM_NUM:i]) + pre_spd = map(float, speeds[i - TERM_NUM:i]) # Integer value need predicting, values start from 0, so every one minus 1. - fol_spd = [i-1 for i in speeds[i:i + FORECASTING_NUM]] - + fol_spd = [i - 1 for i in speeds[i:i + FORECASTING_NUM]] + # Predicting label is missing, abandon the sample. if -1 in fol_spd: continue yield [pre_spd] + fol_spd + def predict_initHook(settings, file_list, **kwargs): settings.pool_size = sys.maxint settings.slots = [dense_vector(TERM_NUM)] -@provider(init_hook=predict_initHook,should_shuffle=False) + +@provider(init_hook=predict_initHook, should_shuffle=False) def process_predict(settings, file_name): with open(file_name) as f: #abandon fields name f.next() for row_num, line in enumerate(f): - speeds = map(int,line.rstrip('\r\n').split(",")) + speeds = map(int, line.rstrip('\r\n').split(",")) end_time = len(speeds) - pre_spd = map(float,speeds[end_time-TERM_NUM:end_time]) + pre_spd = map(float, speeds[end_time - TERM_NUM:end_time]) yield pre_spd - diff --git a/demo/traffic_prediction/gen_result.py b/demo/traffic_prediction/gen_result.py index 78e5bd700335b842a4b8d87e1ec4889c7430e09d..cb8f6e68322cc27031ab58b67e1763ba3bd337ee 100644 --- a/demo/traffic_prediction/gen_result.py +++ b/demo/traffic_prediction/gen_result.py @@ -1,39 +1,40 @@ res = [] with open('./rank-00000') as f: for line in f: - pred = map(int,line.strip('\r\n;').split(";")) + pred = map(int, line.strip('\r\n;').split(";")) #raw prediction range from 0 to 3 - res.append([i+1 for i in pred]) + res.append([i + 1 for i in pred]) file_name = open('./data/pred.list').read().strip('\r\n') -FORECASTING_NUM=24 -header=['id', - '201604200805', - '201604200810', - '201604200815', - '201604200820', - '201604200825', - '201604200830', - '201604200835', - '201604200840', - '201604200845', - '201604200850', - '201604200855', - '201604200900', - '201604200905', - '201604200910', - '201604200915', - '201604200920', - '201604200925', - '201604200930', - '201604200935', - '201604200940', - '201604200945', - '201604200950', - '201604200955', - '201604201000', - ] +FORECASTING_NUM = 24 +header = [ + 'id', + '201604200805', + '201604200810', + '201604200815', + '201604200820', + '201604200825', + '201604200830', + '201604200835', + '201604200840', + '201604200845', + '201604200850', + '201604200855', + '201604200900', + '201604200905', + '201604200910', + '201604200915', + '201604200920', + '201604200925', + '201604200930', + '201604200935', + '201604200940', + '201604200945', + '201604200950', + '201604200955', + '201604201000', +] ################### ## To CSV format ## ################### @@ -43,5 +44,4 @@ with open(file_name) as f: for row_num, line in enumerate(f): fields = line.rstrip('\r\n').split(',') linkid = fields[0] - print linkid+','+','.join(map(str,res[row_num])) - + print linkid + ',' + ','.join(map(str, res[row_num])) diff --git a/demo/traffic_prediction/trainer_config.py b/demo/traffic_prediction/trainer_config.py index 835b1d688cd1ac02ca8a3e1356624c3c9fefee1d..c8755f7f3c28624e9825ba136609f454e4d1c236 100755 --- a/demo/traffic_prediction/trainer_config.py +++ b/demo/traffic_prediction/trainer_config.py @@ -2,26 +2,22 @@ #-*python-*- from paddle.trainer_config_helpers import * - ################################### DATA Configuration ############################################# is_predict = get_config_arg('is_predict', bool, False) trn = './data/train.list' if not is_predict else None tst = './data/test.list' if not is_predict else './data/pred.list' process = 'process' if not is_predict else 'process_predict' -define_py_data_sources2(train_list=trn, - test_list=tst, - module="dataprovider", - obj=process) +define_py_data_sources2( + train_list=trn, test_list=tst, module="dataprovider", obj=process) ################################### Parameter Configuaration ####################################### -TERM_NUM=24 -FORECASTING_NUM= 25 -emb_size=16 -batch_size=128 if not is_predict else 1 +TERM_NUM = 24 +FORECASTING_NUM = 25 +emb_size = 16 +batch_size = 128 if not is_predict else 1 settings( - batch_size = batch_size, - learning_rate = 1e-3, - learning_method = RMSPropOptimizer() -) + batch_size=batch_size, + learning_rate=1e-3, + learning_method=RMSPropOptimizer()) ################################### Algorithm Configuration ######################################## output_label = [] @@ -29,15 +25,17 @@ output_label = [] link_encode = data_layer(name='link_encode', size=TERM_NUM) for i in xrange(FORECASTING_NUM): # Each task share same weight. - link_param = ParamAttr(name='_link_vec.w', initial_max=1.0, initial_min=-1.0) - link_vec = fc_layer(input=link_encode,size=emb_size, param_attr=link_param) + link_param = ParamAttr( + name='_link_vec.w', initial_max=1.0, initial_min=-1.0) + link_vec = fc_layer(input=link_encode, size=emb_size, param_attr=link_param) score = fc_layer(input=link_vec, size=4, act=SoftmaxActivation()) if is_predict: maxid = maxid_layer(score) output_label.append(maxid) else: # Multi-task training. - label = data_layer(name='label_%dmin'%((i+1)*5), size=4) - cls = classification_cost(input=score,name="cost_%dmin"%((i+1)*5), label=label) + label = data_layer(name='label_%dmin' % ((i + 1) * 5), size=4) + cls = classification_cost( + input=score, name="cost_%dmin" % ((i + 1) * 5), label=label) output_label.append(cls) outputs(output_label)