yolov3剪枝训练时,优化器报错
Created by: sucuicong
训练部分主要代码如下:
def train():
logger.info("start train YOLOv3, train params:%s", str(train_parameters))
logger.info("create place, use gpu:" + str(train_parameters['use_gpu']))
logger.info("build network and program")
place = fluid.CUDAPlace(0) if train_parameters['use_gpu'] else fluid.CPUPlace()
exe = fluid.Executor(place)
scope = fluid.Scope()
train_program = fluid.Program()
start_program = fluid.Program()
test_program = fluid.Program()
feeder, reader, loss = build_program_with_feeder(train_program, start_program, place)
pred = build_program_with_feeder(test_program, start_program, istrain=False)
test_program = test_program.clone(for_test=True)
train_fetch_list = [loss.name]
exe.run(start_program, scope=scope)
load_pretrained_params(exe, train_program)
if train_parameters['print_params']:
param_delimit_str = '-' * 20 + "All parameters in current graph" + '-' * 20
print(param_delimit_str)
for block in train_program.blocks:
for param in block.all_parameters():
print("parameter name: {}\tshape: {}".format(param.name,
param.shape))
print('-' * len(param_delimit_str))
pruned_params = train_parameters['pruned_params'].strip().split(",")
logger.info("pruned params: {}".format(pruned_params))
pruned_ratios = [float(n) for n in train_parameters['pruned_ratios'].strip().split(",")]
logger.info("pruned ratios: {}".format(pruned_ratios))
logger.info("build executor and init params")
pruner = Pruner()
train_program = pruner.prune(
train_program,
scope,
params=pruned_params,
ratios=pruned_ratios,
place=place,
only_graph=False)[0]
base_flops = flops(test_program)
test_program = pruner.prune(
test_program,
scope,
params=pruned_params,
ratios=pruned_ratios,
place=place,
only_graph=True)[0]
pruned_flops = flops(test_program)
stop_strategy = train_parameters['early_stop']
rise_limit = stop_strategy['rise_limit']
min_loss = stop_strategy['min_loss']
# stop_train = False
rise_count = 0
total_batch_count = 0
current_best_f1 = 0.0
train_temp_loss = 0
current_best_pass = 0
current_best_box_pass = 0
current_best_recall = 0
current_best_precision = 0
current_best_box_recall = 0
current_best_box_precision = 0
current_best_box_f1 = 0
for pass_id in range(train_parameters["num_epochs"]):
logger.info("current pass: {}, start read image".format(pass_id))
batch_id = 0
total_loss = 0.0
for batch_id, data in enumerate(reader()):
t1 = time.time()
loss = exe.run(train_program, feed=feeder.feed(data), fetch_list=train_fetch_list)
period = time.time() - t1
loss = np.mean(np.array(loss))
total_loss += loss
batch_id += 1
total_batch_count += 1
if batch_id % 200 == 0:
logger.info("pass {}, trainbatch {}, loss {} time {}".format(pass_id,
batch_id, loss, "%2.2f sec" % period))
pass_mean_loss = total_loss / batch_id
logger.info("pass {0} train result, current pass mean loss: {1}".format(pass_id, pass_mean_loss))
logger.info("end training")`
######################################################### #####################报错信息如下:
Python Call Stacks (More useful to users):
------------------------------------------
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/framework.py", line 2594, in _prepend_op
attrs=kwargs.get("attrs", None))
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/layers/nn.py", line 5472, in autoincreased_step_counter
attrs={'step': float(step)})
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/layers/learning_rate_scheduler.py", line 48, in _decay_step_counter
counter_name='@LR_DECAY_COUNTER@', begin=begin, step=1)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/layers/learning_rate_scheduler.py", line 387, in piecewise_decay
global_step = _decay_step_counter()
File "train.py", line 265, in optimizer_momentum_setting
learning_rate=fluid.layers.piecewise_decay(boundaries=boundaries, values=values),
File "train.py", line 371, in get_loss
optimizer = optimizer_momentum_setting()
File "train.py", line 306, in build_program_with_feeder
loss = get_loss(model, outputs, gt_box, gt_label, main_prog)
File "train.py", line 403, in train
feeder, reader, loss = build_program_with_feeder(train_program, start_program, place)
File "train.py", line 544, in <module>
train()
----------------------
Error Message Summary:
----------------------
InvalidArgumentError: The Tensor in the increment Op's Input Variable X(@LR_DECAY_COUNTER@) is not initialized.
[Hint: Expected t->IsInitialized() == true, but received t->IsInitialized():0 != true:1.] at (/paddle/paddle/fluid/framework/operator.cc:1264)
[operator < increment > error]