模型整体结构类似于GAN,模型中的判别器模型已加载预训练好的model,在训练时对相同数据infer时每次执行结果都不一样
Created by: maosengshulei
1)PaddlePaddle版本: 1.8.1 2)CPU
模型整体结构类似预GAN,模型中的判别器已加载预训练好的参数,已检查最后一层fc的参数 和 预训练好的参数一致。
但是在模型训练时检查判别器infer的结果发现每次重新执行后预测结果都不一样
代码如下:
class GTrainer():
def __init__(self, input, config, args, feat_list):
self.program = fluid.default_main_program().clone()
with fluid.program_guard(self.program):
g_model = PointerNet(config)
d_model = DNN()
self.actions, self.logp = g_model.learn(input)
self.infer_program = self.program.clone()
self.online_reward = d_model.net(input, args, feat_list, [], rerank_flag=False)
self.rerank_reward = d_model.net(input, args, feat_list, self.actions, rerank_flag=True)
self.online_reward.stop_gradient = True
self.rerank_reward.stop_gradient = True
self.pi_loss = -1.0 * fluid.layers.reduce_mean(self.logp * self.rerank_reward)
self.avg_reward = fluid.layers.reduce_mean(self.rerank_reward)
vars = []
for var in self.program.list_vars():
if fluid.io.is_parameter(var) and (var.name.startswith("Actor")):
vars.append(var.name)
print(vars)
optimizer = fluid.optimizer.AdamOptimizer(1e-4)
optimizer.minimize(self.pi_loss, parameter_list=vars)
class ListwiseRerankModel(object):
def __init__(self, config, args, feat_list, dataset, param_path):
self.config = config
self.feat_list = feat_list
self.args = args
self.dataset = dataset
self.param_path = param_path
def build_model(self):
dense_feature_user_dim = reduce(lambda x,y:x+y,[ 1 for _,feat in self.feat_list.items() if feat.value_type == 0 and feat.node_type == 'u'])
dense_feature_thread_dim = reduce(lambda x,y:x+y,[ 1 for _,feat in self.feat_list.items() if feat.value_type == 0 and feat.node_type == 'i'])
dense_feature_user = fluid.data(name="dense_input_user", shape=[None, 1, dense_feature_user_dim], dtype='float32')
dense_feature_thread = fluid.data(name="dense_input_thread", shape=[None, 1, dense_feature_thread_dim], lod_level=1, dtype='float32')
sparse_feature_user = [fluid.data(name=feat.prefix, shape=[None, 1], lod_level=1, dtype='int64') for _, feat in self.feat_list.items() if feat.value_type == 1 and feat.node_type == 'u']
sparse_weight_feature_user = [fluid.data(name=feat.prefix + '@index', shape=[None, 1], lod_level=1, dtype='int64') for _, feat in self.feat_list.items() if feat.value_type == 2 and feat.node_type == 'u']
sparse_weight_value_user = [fluid.data(name=feat.prefix + '@value', shape=[None, 1], lod_level=1, dtype='float32') for _, feat in self.feat_list.items() if feat.value_type == 2 and feat.node_type == 'u']
sparse_feature_thread = [fluid.data(name=feat.prefix, shape=[None, 1], lod_level=2, dtype='int64') for _, feat in self.feat_list.items() if feat.value_type == 1 and feat.node_type == 'i']
sparse_weight_feature_thread = [fluid.data(name=feat.prefix + '@index', shape=[None, 1], lod_level=2, dtype='int64') for _, feat in self.feat_list.items() if feat.value_type == 2 and feat.node_type == 'i']
sparse_weight_value_thread = [fluid.data(name=feat.prefix + '@value', shape=[None, 1], lod_level=2, dtype='float32') for _, feat in self.feat_list.items() if feat.value_type == 2 and feat.node_type == 'i']
seq_len = fluid.data(name='seq_len', shape=[None, 1], dtype='int64')
data_list = [dense_feature_user] + [dense_feature_thread] + sparse_feature_user + sparse_weight_feature_user + sparse_weight_value_user + sparse_feature_thread + sparse_weight_feature_thread + sparse_weight_value_thread + [seq_len]
g_trainer = GTrainer(data_list, self.config, self.args, self.feat_list)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
load_pretrained_params(exe, g_trainer.program, self.param_path)
learn_data_loader = fluid.io.DataLoader.from_generator(capacity=64, feed_list=data_list, iterable=True)
learn_data_loader.set_sample_list_generator(
self.dataset.batch_reader(50, self.dataset.cluster_data_reader()),
places=place)
visualize_parameter('run_train_episode_before', 'all_fc.w_0')
for pass_id in range(3):
for data in learn_data_loader():
online_reward = exe.run(g_trainer.program, feed=data, fetch_list=[g_trainer.online_reward])[0]
#print(data)
print(online_reward)
训练数据没有shuffle,因而每次重新训练时训练数据都是一致的。 代码中的online_reward对应判别器对训练数据的预测结果,但是每次重新训练的结果都不一致。