holder_ should not be null Tensor not initialized yet when Tensor::type() is called.
Created by: superwj1990
def build_program(program, startup, is_train): trainer_count = args.dist_env["num_trainers"] device_num_per_worker = get_device_num() # deal with image shape with fluid.program_guard(main_program=program, startup_program=startup): with fluid.unique_name.guard(): imageshape = [3, config['train']['inpsize'], config['train']['inpsize']] image = fluid.layers.data(name='image', shape=imageshape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') iter = fluid.layers.data(name='iter', shape=[1], dtype='float32') if args.fp16: image = fluid.layers.cast(image, "float16") model = models.dictargs.model if is_train: model.set_extract_feature_flag(False) loss, acc = model.net(image, label, iter) if args.scale_loss > 1: scale_loss = loss * float(args.scale_loss) else: scale_loss = loss #avg_loss = fluid.layers.mean(x=loss) #avg_acc = fluid.layers.mean(x=acc) model.set_iter(np.array(fluid.layers.reduce_mean(input=iter))) optimizer = create_optimizer(model.params, args.lr) if args.fp16: params_grads = optimizer.backward(scale_loss) master_params_grads = utils.create_master_params_grads( params_grads, main_program, startup_program, args.scale_loss) optimizer.apply_gradients(master_params_grads) utils.master_param_to_train_param(master_params_grads, params_grads, main_program) else: optimizer.minimize(scale_loss) #opts = optimizer.minimize(scale_loss) global_lr = optimizer._global_learning_rate() train_out = (scale_loss, acc, global_lr) return train_out else: model.set_extract_feature_flag(True) fc = model.net(image, label, iter) return fc
train_out = build_program(train_program, startup_program, True)
test_out = build_program(test_program, startup_program, False)
fluid.memory_optimize(train_program, skip_opt_set=set(train_out))
if args.update_method == "pserver":
train_program, startup_program = pserver_prepare(args, train_program, startup_program)
elif args.update_method == "nccl2":
nccl2_prepare(args, startup_program)
if args.dist_env["training_role"] == "PSERVER":
run_pserver(train_program, startup_program)
exit(0)
Traceback (most recent call last): File "train_classifier_distributed_version.py", line 425, in main() File "train_classifier_distributed_version.py", line 313, in main run_pserver(train_program, startup_program) File "train_classifier_distributed_version.py", line 162, in run_pserver server_exe.run(train_prog) File "/home/ssd2/wangjian/paddle_release_home/python/lib/python2.7/site-packages/paddle/fluid/executor.py", line 525, in run use_program_cache=use_program_cache) File "/home/ssd2/wangjian/paddle_release_home/python/lib/python2.7/site-packages/paddle/fluid/executor.py", line 591, in run exe.run(program.desc, scope, 0, True, True) paddle.fluid.core.EnforceNotMet: Invoke operator conv2d error. Python Callstacks: File "/home/ssd2/wangjian/paddle_release_home/python/lib/python2.7/site-packages/paddle/fluid/framework.py", line 1317, in appen d_op attrs=kwargs.get("attrs", None)) File "/home/ssd2/wangjian/paddle_release_home/python/lib/python2.7/site-packages/paddle/fluid/layer_helper.py", line 56, in appe nd_op return self.main_program.current_block().append_op(*args, **kwargs) File "/home/ssd2/wangjian/paddle_release_home/python/lib/python2.7/site-packages/paddle/fluid/layers/nn.py", line 1976, in conv2 d 'fuse_relu_before_depthwise_conv': False File "../models/sphere_resnet_128.py", line 113, in conv_bn_layer bias_attr=False) File "../models/sphere_resnet_128.py", line 58, in net input=input, num_filters=64, filter_size=5, stride=2, act='relu') File "train_classifier_distributed_version.py", line 277, in build_program loss, acc = model.net(image, label, iter) File "train_classifier_distributed_version.py", line 303, in main train_out = build_program(train_program, startup_program, True) File "train_classifier_distributed_version.py", line 425, in main() C++ Callstacks: holder should not be null Tensor not initialized yet when Tensor::type() is called. at [/paddle/paddle/fluid/framework/tensor.h:145] PaddlePaddle Call Stacks: