mpi训练得到的模型在本地infer却报meets grpc error:OS Error
Created by: 333caowei
def infer(use_cuda, model_path=None):
if model_path is None:
print(str(model_path) + " cannot be found")
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
main = fluid.Program()
startup = fluid.Program()
#inference_scope = fluid.core.Scope()
#with fluid.scope_guard(inference_scope):
with fluid.program_guard(main, startup):
model = Model(feature_size_dict=feature_size_dict, is_sparse=True)
model.build_model()
[inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(model_path, exe)
feeder = fluid.DataFeeder(feed_list=feed_target_names, place=place)
feature_list = list()
feature_str_list = list()
for line in sys.stdin:
line = line.strip().strip("\r\n")
line = line.split('\t')
query = line[1]
query_char_id =eval(line[1].decode('utf-8'))
pos = int(line[5])
query_history_clicked_book_list = eval(line[13])
query_history_cate1_ctr_str_list = eval(line[14])
query_history_cate2_ctr_str_list = eval(line[15])
query_history_cate3_ctr_str_list = eval(line[16])
query_history_clicked_author_lis = eval(line[17])
feature = [ \
[query_id], \
query_char_id_list, \
[pos], \
query_history_clicked_book_id_list, \
query_history_clicked_author_id_list, \
query_history_cate1_ctr_list, \
query_history_cate2_ctr_list, \
query_history_cate3_ctr_list, \
[1]
]
feature_str = [query_str, book_str]
feature_list.append(feature)
feature_str_list.append(feature_str)
print feature_list
if len(feature_list) == batch_size:
output_list = exe.run(inference_program,
feed=feeder.feed(feature_list),
fetch_list=fetch_targets,
return_numpy=True)
for i, j in zip(feature_str_list, output_list[0]):
print '%s\t%s\t%s' % (i[0], i[1], float(j))
feature_list = []
feature_str_list = []
if len(feature_list) > 0:
output_list = exe.run(inference_program,
feed=feeder.feed(feature_list),
fetch_list=fetch_targets,
return_numpy=True)
for i, j, k, l in zip(feature_str_list, output_list[0], output_list[1], output_list[2]):
print i[0]
print j
print k
print l
feature_list = []
feature_str_list = []
if __name__ == "__main__":
epoch_path = 'data/model3.pass1'
infer(use_cuda=False, model_path=epoch_path)
r_speed.py
代码上面所示,很简单,载入模型后输入数据进行预测,但是却提示grpc错误,模型是mpi训练得到的,但是我现在这是本地预测,和集群毫无关系:
F1022 23:47:17.269440 29550 grpc_client.cc:295] Send name:[nce_w@GRAD.block12], ep:[10.109.92.18:8000] meets grpc error:OS Error
*** Check failure stack trace: ***
@ 0x7f0b938fef2d google::LogMessage::Fail()
@ 0x7f0b939029dc google::LogMessage::SendToLog()
@ 0x7f0b938fea53 google::LogMessage::Flush()
@ 0x7f0b93903eee google::LogMessageFatal::~LogMessageFatal()
@ 0x7f0b947a71e8 paddle::operators::distributed::GRPCClient::Proceed()
@ 0x7f0bcb7bb8a0 execute_native_thread_routine
@ 0x7f0bd77a4d14 start_thread
@ 0x7f0bd6dd4bfd clone
@ (nil) (unknown)
Aborted