回归预测的实验运行出错!
Created by: water1981
输入 是1100*16的稀疏向量,用空格分隔,第一列是label,剩下1100列是特征(特征编号:1),格式如下 1 9:1 29:1 45:1 58:1 70:1 82:1 102:1 116:1 135:1 146:1 162:1 178:1 203:1 211:1 236:1 242:1 267:1 282:1 290:1 306:1 327:1 342:1 354:1 378:1 386:1 411:1 425:1 433:1 455:1 479:1 .....
dataprovider:
@provider(
input_types=[
sparse_binary_vector(1100*16),
dense_vector(1)]
)
def process(settings, file_name):
with open(file_name, 'r') as f:
for line_count, line in enumerate(f):
label= line.strip().split(' ')[0]
feature_vector =[int(feat.split(':')[0])-1 for feat in line.strip().split(' ')[1:]]
yield feature_vector,[int(label)]
训练配置文件 trainer_config.lr.py:
is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list'
process = 'process'
define_py_data_sources2(train_list=trn,
test_list=tst,
module="dataprovider",
obj=process,
)
batch_size = 1 if not is_predict else 1
settings(
batch_size=batch_size,
learning_rate=2e-3,
learning_method=AdaDeltaOptimizer(),
)
drop_rate=0.5
default_decay_rate(1e-4 * batch_size)
hidden_dim=512
default_initial_std(1 / math.sqrt(hidden_dim))
data = data_layer(name="feature", size=1100*16)
score = data_layer(name="label",size=1)
layer1 = fc_layer(input=data, size=256,act=LinearActivation(),param_attr=ParameterAttribute(name='_fc1_w_',initial_mean=0,initial_std=1/math.sqrt(256)))
output = fc_layer(input=layer1,size=1,act=LinearActivation(),param_attr=ParameterAttribute(name='_predict_w',initial_mean=0,initial_std=1/math.sqrt(2)))
if not is_predict:
outputs(regression_cost(input=output,
label=score))
else:
outputs(output)
训练脚本 train.sh:
set -e
cfg=trainer_config.lr.py
/home/img/liushui/bin/paddle train \
--config=$cfg \
--save_dir=./output \
--trainer_count=4 \
--log_period=20 \
--num_passes=15 \
--use_gpu=false \
--show_parameter_stats_period=1 \
--dot_period=1 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
运行 nohup bash train.sh & 后,一直停在初始化的提示上面:
I /home/img/baidu/idl/paddle/paddle/utils/Util.cpp:138] commandline: /home/img/liushui/bin/../opt/paddle/bin/paddle_trainer --config=trainer_config.lr.py --save_dir=./output --trainer_count=4 --log_period=20 --num_passes=15 --use_gpu=false --show_parameter_stats_period=1 --dot_period=1 --test_all_data_in_one_period=1
I /home/img/baidu/idl/paddle/paddle/utils/Util.cpp:113] Calling runInitFunctions
I /home/img/baidu/idl/paddle/paddle/utils/Util.cpp:126] Call runInitFunctions done.
[INFO 2016-12-27 16:22:02,662 networks.py:1122] The input order is [feature, label]
[INFO 2016-12-27 16:22:02,662 networks.py:1129] The output order is [__regression_cost_0__]
I /home/img/baidu/idl/paddle/paddle/trainer/Trainer.cpp:169] trainer mode: Normal
I /home/img/baidu/idl/paddle/paddle/gserver/dataproviders/PyDataProvider2.cpp:247] loading dataprovider dataprovider::process
I /home/img/baidu/idl/paddle/paddle/gserver/dataproviders/PyDataProvider2.cpp:247] loading dataprovider dataprovider::process
I /home/img/baidu/idl/paddle/paddle/gserver/gradientmachines/GradientMachine.cpp:134] Initing parameters..
I /home/img/baidu/idl/paddle/paddle/gserver/gradientmachines/GradientMachine.cpp:141] Init parameters done.