embedding_layer问题
Created by: Annnnnnnnnnnnn
paddle 版本1.3, 系统:centos 平台:gpu 模型描述:多任务学习,任务间共享一个embedding矩阵,然后任务的损失联合优化(目前仅仅简单的加和)
class multi_task(BaseModel):
"""多任务demo"""
def __init__(self, args):
"""
创建整个模型
:param args: 配置文件
"""
super(multi_task, self).__init__(args)
self._create_model()
self._create_opts()
def _create_model(self):
"""
创建网络
"""
logger.info("create networks")
self.task_1st = ModelHelper(self.args.word_space, self.args.embed_size, self.args.is_training, name="task_1st")
self.task_2nd = ModelHelper(self.args.word_space, self.args.embed_size, self.args.is_training, name="task_2nd")
# for var in fluid.default_startup_program().list_vars():
# if fluid.io.is_parameter(var):
# print var.name
def _create_opts(self):
"""
创建优化器
"""
# self.loss = reduce(lambda x, y: x + y, [self.task_1st.loss, self.task_2nd.loss, self.task_3rd.loss, self.task_4th.loss])
self.loss = self.task_1st.loss + self.task_2nd.loss
decay_lr = fluid.layers.exponential_decay(learning_rate=self.args.lr, decay_steps=50,
decay_rate=0.9, staircase=False)
self.optim = fluid.optimizer.Adam(learning_rate=decay_lr,
regularization=fluid.regularizer.L2Decay(regularization_coeff=self.args.l2_reg))
self.optim.minimize(self.loss)
def _create_summaries(self, is_batch, x, *y):
"""
收集各类指标
:param x: x轴
:param y: y轴/list
:param is_epoch: batch/epoch
"""
x_axis, y_axis = ({"x_name": "batch", "x_value": x}, [{"y_name": "loss", "y_value": val} for val in y]) if is_batch \
else ({"x_name": "epoch", "x_value": x}, [{"y_name": tag, "y_value": val}
for tag, val in zip(["train_acc", "val_acc"], y)])
visualdl.show_fluid_trend(x_axis, y_axis)
def _train_loop(self, program):
"""
训练核心逻辑
:param program: 主程序
"""
self.exe.run(fluid.default_startup_program())
# dataset = MultiReader(self.args.word_space)
dataset_1st = Reader(self.args.word_space)
self.args.train_dir = "./train_data" if self.args.cloud_train else self.args.train_dir
train_reader_1st = paddle.batch(paddle.reader.shuffle(
dataset_1st.feed(glob.glob(os.path.join(self.args.train_dir, "part-*"))),
buf_size=self.args.batch_size * 100),
batch_size=self.args.batch_size)
self.task_1st.py_reader.decorate_paddle_reader(train_reader_1st)
dataset_2nd = Reader(self.args.word_space)
self.args.train_dir = "./train_data" if self.args.cloud_train else self.args.train_dir
train_reader_2nd = paddle.batch(paddle.reader.shuffle(
dataset_2nd.feed(glob.glob(os.path.join(self.args.train_dir, "part-*"))),
buf_size=self.args.batch_size * 100),
batch_size=self.args.batch_size)
self.task_2nd.py_reader.decorate_paddle_reader(train_reader_2nd)
exec_strategy = fluid.ExecutionStrategy()
build_strategy = fluid.BuildStrategy()
cpu_num = int(os.environ.get('CPU_NUM', cpu_count()))
if os.getenv("NUM_THREADS", ""):
cpu_num = int(os.getenv("NUM_THREADS"))
exec_strategy.num_threads = cpu_num
build_strategy.reduce_strategy = \
fluid.BuildStrategy.ReduceStrategy.Reduce if cpu_num > 1 \
else fluid.BuildStrategy.ReduceStrategy.AllReduce
pe = fluid.ParallelExecutor(use_cuda=self.args.use_cuda,
loss_name=self.loss.name,
main_program=program,
build_strategy=build_strategy,
exec_strategy=exec_strategy)
for pass_id in xrange(self.args.num_passes):
batch_id = 0
self.task_1st.py_reader.start()
self.task_2nd.py_reader.start()
pass_start_time = time.time()
try:
while True:
start_time = time.time()
loss, task_1st_acc, task_2nd_acc = \
pe.run(fetch_list=[self.loss.name,
self.task_1st.acc.name,
self.task_2nd.acc.name])
interval = time.time() - start_time
loss = np.mean(loss)
acc = np.mean(task_1st_acc)
# logger.info("TRAIN --> pass: %2d, batch: %2d, time: %4.4f, loss: %.8f, acc: %.8f, sample_per_second: %2d" \
# % (pass_id, batch_id, interval, loss, acc, self.args.batch_size * cpu_num / float(interval)))
logger.info("TRAIN --> pass: %2d, batch: %2d, time: %4.4f, loss: %.8f, acc: %.8f, sample_per_second: %2d" \
% (pass_id, batch_id, interval, loss, acc, self.args.batch_size * cpu_num / float(interval)))
batch_id += 1
except fluid.core.EOFException:
self.task_1st.py_reader.reset()
self.task_2nd.py_reader.reset()
if self.args.trainer_id == 0:
self.save(program, self.args.checkpoint, pass_id, acc)
acc = np.mean(Acc.eval())
metric = eval(self.args)
logger.info("[%2d/%d], train_Acc: %.8f, valid_Acc: %.8f, pass_time_cost: %4.4f" \
% (pass_id, self.args.num_passes, acc, metric[0], time.time() - pass_start_time))
self.early_stopping(metric)
if self.early_stopping.early_stop:
logger.info("Early Stopp, The best metric: %.8f, checkpoint: %s" % \
(self.early_stopping.best_score, self.early_stopping.tag))
break
if self.args.cloud_train:
self._create_summaries(False, pass_id, acc, metric[0])
self.exe.close()
Python Callstacks:
File "/home/wanglongfei02/.jumbo/lib/python2.7/site-packages/paddle/fluid/framework.py", line 1317, in append_op
attrs=kwargs.get("attrs", None))
File "/home/wanglongfei02/.jumbo/lib/python2.7/site-packages/paddle/fluid/layer_helper.py", line 56, in append_op
return self.main_program.current_block().append_op(*args, **kwargs)
File "/home/wanglongfei02/.jumbo/lib/python2.7/site-packages/paddle/fluid/layers/nn.py", line 364, in embedding
'padding_idx': padding_idx
File "/home/wanglongfei02/git/baidu/erised/lab/social/models/layers.py", line 53, in embed
param_attr=param)
File "/home/wanglongfei02/git/baidu/erised/lab/social/models/utils.py", line 196, in wrapper
return fn(*args, **kwargs)
File "/home/wanglongfei02/git/baidu/erised/lab/social/models/mtl/module.py", line 87, in Encoder
sparse_embed_seq = embed(sparse_feature, word_space, embed_size)
File "/home/wanglongfei02/git/baidu/erised/lab/social/models/utils.py", line 196, in wrapper
return fn(*args, **kwargs)
File "/home/wanglongfei02/git/baidu/erised/lab/social/models/mtl/module.py", line 46, in __init__
enc = Encoder(sparse_feature, word_space, embed_size, is_training, reuse=True, name="Encoder") # 编码阶段
File "model.py", line 65, in _create_model
self.args.l2_reg, self.args.is_training, name="task_3rd")
File "model.py", line 52, in __init__
self._create_model()
File "model.py", line 284, in <module>
demo = multi_task(args)
C++ Callstacks:
Enforce failed. Expected d_table_value->dims() == framework::flatten_to_2d(d_output_dims, d_output_dims.size() - 1), but received d_table_value->dims():423066, 32 != framework::flatten_to_2d(d_output_dims, d_output_dims.size() - 1):366697, 32.