ValueError when save detection model
Created by: qjing666
There is a value error: "var im_id not in this block" when save a detection model. While "im_id" exists in trainer main program.
fl_trainer.py:
job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:9091" # Inform scheduler IP address to trainer trainer = FLTrainerFactory().create_fl_trainer(job) trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id) trainer.start(fluid.CUDAPlace(trainer_id+1))
test_program = trainer._main_program.clone(for_test=True)
image = fluid.layers.data(name='image', shape=[3, None, None], dtype = 'float32',lod_level=0) im_info = fluid.layers.data(name='im_info', shape=[None, 3], dtype = 'float32',lod_level=0) im_id = fluid.layers.data(name='im_id', shape=[None, 1], dtype = 'int64',lod_level=0) gt_bbox = fluid.layers.data(name='gt_bbox', shape=[None,4], dtype = 'float32', lod_level=1) gt_class = fluid.layers.data(name='gt_class', shape=[None,1], dtype = 'int32', lod_level=1) is_crowd = fluid.layers.data(name='is_crowd', shape=[None,1], dtype = 'int32', lod_level=1) place = fluid.CUDAPlace(trainer_id) feeder = fluid.DataFeeder(feed_list=[image, im_info, im_id, gt_bbox, gt_class, is_crowd], place=place)
output_folder = "model_node%d" % trainer_id epoch_id = 0 step = 0
while not trainer.stop():
epoch_id += 1
if epoch_id > 10:
break
print("epoch %d start train" % (epoch_id))
test_class = TestReader()
data_loader = test_class.test_loader()
for step_id, data in enumerate(data_loader):
acc = trainer.run(feeder.feed(data), fetch=['sum_0.tmp_0'])
step += 1
print("step: {}, loss: {}".format(step, acc))
if trainer_id == 0:
save_dir = (output_folder + "/epoch_%d") % epoch_id
trainer.save_inference_program(output_folder)
Trainer main program Intercept:
vars { name: "im_id" type { type: LOD_TENSOR lod_tensor { tensor { data_type: INT64 dims: -1 dims: 1 } lod_level: 0 } } persistable: false need_check_feed: true }