本地predict错误
Created by: sjtuwy
错误信息如下:
+ model=./gender_sparse_model/pass-00019
+ config=./trainer_config_gender_sparse.cluster.py
+ predict_data=./gender_sparse_data/20161112
+ python predict.py -n ./trainer_config_gender_sparse.cluster.py -w ./gender_sparse_model/pass-00019 -i ./gender_sparse_data/20161112
I1221 17:59:07.502112 34343 Util.cpp:158] commandline: --use_gpu=0
I1221 17:59:07.502246 34343 Util.cpp:132] Calling runInitFunctions
I1221 17:59:07.502578 34343 Util.cpp:146] Call runInitFunctions done.
Traceback (most recent call last):
File "<string>", line 13, in <module>
NameError: name 'GLOG_logtostderr' is not defined
[INFO 2016-12-21 17:59:07,525 networks.py:1466] The input order is [input_fea]
[INFO 2016-12-21 17:59:07,525 networks.py:1472] The output order is [__fc_layer_6__]
I1221 17:59:12.760416 34343 GradientMachine.cpp:124] Loading parameters from ./gender_sparse_model/pass-00019
Traceback (most recent call last):
File "predict.py", line 107, in <module>
main()
File "predict.py", line 104, in main
predict.predict_onebyone(data)
File "predict.py", line 77, in predict_onebyone
input = self.converter.convert([[fea]])
File "/home/disk0/it/paddle/paddle_internal_release_tools/idl/paddle/output/python27-gcc482/lib/python2.7/site-packages/py_paddle/dataprovider_converter.py", line 152, in convert
scanner.finish_scan(argument)
File "/home/disk0/it/paddle/paddle_internal_release_tools/idl/paddle/output/python27-gcc482/lib/python2.7/site-packages/py_paddle/dataprovider_converter.py", line 78, in finish_scan
m.sparseCopyFrom(self.__rows__, self.__cols__, self.__value__)
File "/home/disk0/it/paddle/paddle_internal_release_tools/idl/paddle/output/python27-gcc482/lib/python2.7/site-packages/py_paddle/swig_paddle.py", line 680, in sparseCopyFrom
return _swig_paddle.Matrix_sparseCopyFrom(self, *args)
NotImplementedError: Wrong number or type of arguments for overloaded function 'Matrix_sparseCopyFrom'.
Possible C/C++ prototypes are:
Matrix::sparseCopyFrom(std::vector< int,std::allocator< int > > const &,std::vector< int,std::allocator< int > > const &,std::vector< float,std::allocator< float > > const &)
Matrix::sparseCopyFrom(std::vector< int,std::allocator< int > > const &,std::vector< int,std::allocator< int > > const &)
麻烦协助排查下原因,谢谢。 附: predict.sh:
model=./gender_sparse_model/pass-00019
config=./trainer_config_gender_sparse.cluster.py
predict_data=./gender_sparse_data/20161112
python predict.py \
-n $config\
-w $model \
-i $predict_data \
> ./gender_sparse_data/20161112.res
predict.py:
import os
import numpy as np
from optparse import OptionParser
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.config_parser import parse_config
from paddle.trainer.PyDataProvider2 import sparse_binary_vector
"""
Usage: run following command to show help message.
python predict.py -h
"""
fea_map = {}
for line in open('./sparse_fea_entropy.20161112.dict', 'r'):
l_s = line.rstrip('\n\r').split('\t')
fea_map[int(l_s[0])] = int(l_s[1])
class ModelPrediction():
def __init__(self, train_conf, model_dir=None):
"""
train_conf: trainer configure.
dict_file: word dictionary file name.
model_dir: directory of model.
"""
self.train_conf = train_conf
self.model_dir = model_dir
conf = parse_config(train_conf, "is_predict=1")
self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
self.network.loadParameters(self.model_dir)
slots = [sparse_binary_vector(5000001)]
self.converter = DataProviderConverter(slots)
def predict_onebyone(self, data_file):
import sys
"""
The main function for loading data.
Load the batch, iterate all the images and labels in this batch.
file_name: the batch file name.
"""
if data_file is None:
f = sys.stdin
else:
f = open(data_file)
while True:
line = f.readline()
if len(line) == 0:
break
try:
l_s = line.rstrip('\n').split()
user = l_s[0]
target = l_s[1]
feature = l_s[2:]
except:
print >> sys.stderr, line,
continue
try:
fea = [int(i.split(':')[0]) for i in feature]
except Exception, e:
print >> sys.stderr, "fetch data failed",e
continue
input = self.converter.convert([[fea]])
output = self.network.forwardTest(input)
prediction = output[0]["value"][0][0]
print_str = user+"\t"+target+"\t"+str(prediction)
print print_str
def option_parser():
usage = "python predict.py -n config -w model_dir -i input_file "
parser = OptionParser(usage="usage: %s [options]" % usage)
parser.add_option("-n", "--tconf", action="store",
dest="train_conf", help="network config")
parser.add_option("-i", "--data", action="store",
dest="data", help="data file to predict")
parser.add_option("-w", "--model", action="store",
dest="model_path", default=None,
help="model path")
return parser.parse_args()
def main():
options, args = option_parser()
train_conf = options.train_conf
data = options.data
model_path = options.model_path
swig_paddle.initPaddle("--use_gpu=0")
predict = ModelPrediction(train_conf, model_path)
predict.predict_onebyone(data)
trainer_config_gender_sparse.cluster.py
from paddle.trainer_config_helpers import *
import datetime
cluster_config(
fs_name = "xxx",
fs_ugi = "xxx",
output_path="xxx")),
train_data_path="xxx",
test_data_path="xxx",
use_remote_sparse=True,
)
is_predict = get_config_arg("is_predict", bool, False)
input_dim = 5000001
num_classes = 2
####################Data Configuration ##################
if not is_predict:
data_dir='data/'
define_py_data_sources2(train_list='train.list',
test_list=None,
module='ei_fea_provider_gender_sparse',
obj='processData')
settings(
batch_size = 500,
learning_method = AdaGradOptimizer(),
)
input = data_layer(name='input_fea', size=input_dim)
label = data_layer(name="label", size=num_classes)
#emb = embedding_layer(input=input, size=256, param_attr=ParamAttr(sparse_update=True))
#emb_sum = pooling_layer(input=emb, pooling_type=SumPooling())
hidden = fc_layer(input=input, size=256, act=ReluActivation(), param_attr=ParamAttr(sparse_update=True,l1_rate=0.1))
hidden = fc_layer(input=hidden, size=256, act=ReluActivation())
hidden = fc_layer(input=hidden, size=128, act=ReluActivation())
hidden = fc_layer(input=hidden, size=64, act=ReluActivation())
hidden = fc_layer(input=hidden, size=32, act=ReluActivation())
hidden = fc_layer(input=hidden, size=16, act=ReluActivation())
if not is_predict:
prediction = fc_layer(input=hidden, size=num_classes, act=SoftmaxActivation())
outputs(classification_cost(input=prediction,
label=label,
evaluator=[precision_recall_evaluator, classification_error_evaluator, auc_evaluator]))
for i in range(num_classes):
precision_recall_evaluator(name="PreRec of label [{0}]".format(i), input=prediction, label=label, positive_label=i)
else:
prediction = fc_layer(input=hidden, size=num_classes, act=SoftmaxActivation())
outputs([prediction])
样本例子:
user_id label0 962870217988607:3 985761982809793:3 838052250556224:2 3096226547944201:11