训练和测试使用一批相同的数据,训练误差和测试误差不一致
Created by: Dely-Yu
训练和测试使用一批相同的数据,训练误差0.03左右,但是测试误差3.4左右。
def train_net(args, pad_num=20, num_classes=100): from _8conv_baseline_char_fc_attdecode_small_open.cnn_ctc_model_senet_8conv_pfc_attdecode import encoder_net_attdecode_return_conv_features images = fluid.layers.data(name='img', shape=[1,32,256], dtype='float32') images = fluid.layers.cast(x= images , dtype='float32') mask = fluid.layers.data(name='mask', shape=[20, 16*128], dtype='float32') mask = fluid.layers.cast(x= mask , dtype='float32') #fluid.layers.Print(images,summarize = 10,message="images") conv_features= encoder_net_attdecode_return_conv_features(images,word_vector_dim-1,mask)#fluid.layers.data(name='conv_feature', shape=[20,64], dtype='float32') print("conv_features is ",conv_features) #content = fluid.layers.data(name='word', shape=[1], dtype='int32', lod_level=1)
pos = fluid.layers.data(name="pos", shape=[pad_num, 4],append_batch_size=False, dtype='float32')
pos = fluid.layers.cast(x= pos , dtype='float32')
_tmp_pos = fluid.layers.unsqueeze(pos, axes=[0])
_tmp_pos = fluid.layers.expand(_tmp_pos, expand_times=[pad_num, 1, 1])
_tmp_pos_transp = fluid.layers.transpose(_tmp_pos, perm=[1, 0, 2])
print("_tmp_pos is ",_tmp_pos)
print("_tmp_pos_transp is ",_tmp_pos_transp)
adjacency = _tmp_pos - _tmp_pos_transp #????# fluid.layers.data(name='adja', shape=[pad_num, pad_num, 6], append_batch_size=False, dtype='float32')
node_label = fluid.layers.data(name='node_label', shape=[pad_num, 1], append_batch_size=False, dtype='int32')
node_label = fluid.layers.cast(x=node_label, dtype='int64')
node_mask = fluid.layers.data(name='node_mask', shape=[pad_num, 1], append_batch_size=False, dtype='int32')
node_mask = fluid.layers.cast(x=node_mask, dtype='float32')
edge_label = fluid.layers.data(name='edge_label', shape=[pad_num, pad_num], append_batch_size=False, dtype='int32')
edge_label = fluid.layers.cast(x=edge_label, dtype='float32')
edge_balance = fluid.layers.data(name='edge_mask', shape=[pad_num, pad_num], append_batch_size=False, dtype='float32')
edge_balance = fluid.layers.cast(x=edge_balance, dtype='float32')
tmp = fluid.layers.fill_constant([pad_num, pad_num], value=0.5, dtype='float32')
edge_mask = fluid.layers.less_than(tmp, edge_balance)
edge_mask = fluid.layers.cast(x=edge_mask, dtype='float32')
edge_out, node_out = encoder_net(conv_features, adjacency, node_mask, edge_mask, pad_num, num_classes)
print('edge out is ',edge_out)
print('node_out is ',node_out)
print(" ok encoder ")
node_cost = fluid.layers.softmax_with_cross_entropy(node_out, node_label)
node_cost = fluid.layers.reduce_sum(node_cost * node_mask)/fluid.layers.reduce_sum(node_mask)
#edge_cost = fluid.layers.sigmoid_cross_entropy_with_logits(edge_out, edge_label)
edge_cost = focal_loss_sigmod(edge_out, edge_label)
edge_cost = fluid.layers.reduce_sum(edge_cost * edge_balance)
edge_cost *= args.lamb
sum_cost = node_cost + edge_cost
node_index = fluid.layers.argmax(node_out, axis=1)
edge_score = fluid.layers.sigmoid(edge_out)
_**inference_program = fluid.default_main_program().clone(for_test=True)**_
if args.learning_rate_decay == "piecewise_decay":
learning_rate = fluid.layers.piecewise_decay([args.stepvalue], [args.lr, args.lr * 0.1])
else:
learning_rate = args.lr
optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
#optimizer = fluid.optimizer.Momentum(learning_rate=learning_rate, momentum=0.9)
_, params_grads = optimizer.minimize(sum_cost)
model_average = None
if args.average_window > 0:
model_average = fluid.optimizer.ModelAverage(
args.average_window,
min_average_window=args.min_average_window,
max_average_window=args.max_average_window)
return node_cost, edge_cost, inference_program, model_average, node_index, edge_score,sum_cost