Skip to content

  • 体验新版
    • 正在加载...
  • 登录
  • PaddlePaddle
  • Paddle
  • Issue
  • #18321

P
Paddle
  • 项目概览

PaddlePaddle / Paddle
大约 2 年 前同步成功

通知 2325
Star 20933
Fork 5424
  • 代码
    • 文件
    • 提交
    • 分支
    • Tags
    • 贡献者
    • 分支图
    • Diff
  • Issue 1423
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 543
  • Wiki 0
    • Wiki
  • 分析
    • 仓库
    • DevOps
  • 项目成员
  • Pages
P
Paddle
  • 项目概览
    • 项目概览
    • 详情
    • 发布
  • 仓库
    • 仓库
    • 文件
    • 提交
    • 分支
    • 标签
    • 贡献者
    • 分支图
    • 比较
  • Issue 1,423
    • Issue 1,423
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 543
    • 合并请求 543
  • Pages
  • 分析
    • 分析
    • 仓库分析
    • DevOps
  • Wiki 0
    • Wiki
  • 成员
    • 成员
  • 收起侧边栏
  • 动态
  • 分支图
  • 创建新Issue
  • 提交
  • Issue看板
已关闭
开放中
Opened 6月 25, 2019 by saxon_zh@saxon_zhGuest

embedding_layer问题

Created by: Annnnnnnnnnnnn

paddle 版本1.3, 系统:centos 平台:gpu 模型描述:多任务学习,任务间共享一个embedding矩阵,然后任务的损失联合优化(目前仅仅简单的加和)

class multi_task(BaseModel):
    """多任务demo"""
    def __init__(self, args):
        """
        创建整个模型
        :param args: 配置文件
        """
        super(multi_task, self).__init__(args)
        self._create_model()
        self._create_opts()

    def _create_model(self):
        """
        创建网络
        """
        logger.info("create networks")
        self.task_1st = ModelHelper(self.args.word_space, self.args.embed_size, self.args.is_training, name="task_1st")
        self.task_2nd = ModelHelper(self.args.word_space, self.args.embed_size, self.args.is_training, name="task_2nd")

        # for var in fluid.default_startup_program().list_vars():
        #         if fluid.io.is_parameter(var):
        #             print var.name

    def _create_opts(self):
        """
        创建优化器
        """
        # self.loss = reduce(lambda x, y: x + y, [self.task_1st.loss, self.task_2nd.loss, self.task_3rd.loss, self.task_4th.loss])
        self.loss = self.task_1st.loss + self.task_2nd.loss
        decay_lr = fluid.layers.exponential_decay(learning_rate=self.args.lr, decay_steps=50,
                                                      decay_rate=0.9, staircase=False)
        self.optim = fluid.optimizer.Adam(learning_rate=decay_lr,
                        regularization=fluid.regularizer.L2Decay(regularization_coeff=self.args.l2_reg))
        self.optim.minimize(self.loss)

    def _create_summaries(self, is_batch, x, *y):
        """
        收集各类指标
        :param x: x轴
        :param y: y轴/list
        :param is_epoch: batch/epoch
        """
        x_axis, y_axis = ({"x_name": "batch", "x_value": x}, [{"y_name": "loss", "y_value": val} for val in y]) if is_batch \
                            else ({"x_name": "epoch", "x_value": x}, [{"y_name": tag, "y_value": val}
                                                    for tag, val in zip(["train_acc", "val_acc"], y)])
        visualdl.show_fluid_trend(x_axis, y_axis)

    def _train_loop(self, program):
        """
        训练核心逻辑
        :param program: 主程序
        """
        self.exe.run(fluid.default_startup_program())

        # dataset = MultiReader(self.args.word_space)
        dataset_1st = Reader(self.args.word_space)
        self.args.train_dir = "./train_data" if self.args.cloud_train else self.args.train_dir
        train_reader_1st = paddle.batch(paddle.reader.shuffle(
                        dataset_1st.feed(glob.glob(os.path.join(self.args.train_dir, "part-*"))),
                        buf_size=self.args.batch_size * 100),
                        batch_size=self.args.batch_size)
        self.task_1st.py_reader.decorate_paddle_reader(train_reader_1st)

        dataset_2nd = Reader(self.args.word_space)
        self.args.train_dir = "./train_data" if self.args.cloud_train else self.args.train_dir
        train_reader_2nd = paddle.batch(paddle.reader.shuffle(
                        dataset_2nd.feed(glob.glob(os.path.join(self.args.train_dir, "part-*"))),
                        buf_size=self.args.batch_size * 100),
                        batch_size=self.args.batch_size)
        self.task_2nd.py_reader.decorate_paddle_reader(train_reader_2nd)

        exec_strategy = fluid.ExecutionStrategy()
        build_strategy = fluid.BuildStrategy()
        cpu_num = int(os.environ.get('CPU_NUM', cpu_count()))
        if os.getenv("NUM_THREADS", ""):
            cpu_num = int(os.getenv("NUM_THREADS"))

        exec_strategy.num_threads = cpu_num
        build_strategy.reduce_strategy = \
                fluid.BuildStrategy.ReduceStrategy.Reduce if cpu_num > 1 \
                else fluid.BuildStrategy.ReduceStrategy.AllReduce

        pe = fluid.ParallelExecutor(use_cuda=self.args.use_cuda,
                                    loss_name=self.loss.name,
                                    main_program=program,
                                    build_strategy=build_strategy,
                                    exec_strategy=exec_strategy)

        for pass_id in xrange(self.args.num_passes):
            batch_id = 0

            self.task_1st.py_reader.start()
            self.task_2nd.py_reader.start()

            pass_start_time = time.time()
            try:
                while True:
                    start_time = time.time()
                    loss, task_1st_acc, task_2nd_acc = \
                                      pe.run(fetch_list=[self.loss.name,
                                                 self.task_1st.acc.name,
                                                 self.task_2nd.acc.name])


                    interval = time.time() - start_time
                    loss = np.mean(loss)
                    acc = np.mean(task_1st_acc)
                    # logger.info("TRAIN --> pass: %2d, batch: %2d, time: %4.4f, loss: %.8f, acc: %.8f, sample_per_second: %2d" \
                    #                 % (pass_id, batch_id, interval, loss, acc, self.args.batch_size * cpu_num / float(interval)))
                    logger.info("TRAIN --> pass: %2d, batch: %2d, time: %4.4f, loss: %.8f, acc: %.8f, sample_per_second: %2d" \
                                    % (pass_id, batch_id, interval, loss, acc, self.args.batch_size * cpu_num / float(interval)))

                    batch_id += 1

            except fluid.core.EOFException:
                self.task_1st.py_reader.reset()
                self.task_2nd.py_reader.reset()

                if self.args.trainer_id == 0:
                    self.save(program, self.args.checkpoint, pass_id, acc)
                    acc = np.mean(Acc.eval())
                    metric = eval(self.args)

                    logger.info("[%2d/%d], train_Acc: %.8f, valid_Acc: %.8f, pass_time_cost: %4.4f" \
                    % (pass_id, self.args.num_passes, acc, metric[0], time.time() - pass_start_time))

                    self.early_stopping(metric)
                    if self.early_stopping.early_stop:
                        logger.info("Early Stopp, The best metric: %.8f, checkpoint: %s" % \
                             (self.early_stopping.best_score, self.early_stopping.tag))
                        break

                    if self.args.cloud_train:
                        self._create_summaries(False, pass_id, acc, metric[0])

        self.exe.close()

Python Callstacks:
  File "/home/wanglongfei02/.jumbo/lib/python2.7/site-packages/paddle/fluid/framework.py", line 1317, in append_op
    attrs=kwargs.get("attrs", None))
  File "/home/wanglongfei02/.jumbo/lib/python2.7/site-packages/paddle/fluid/layer_helper.py", line 56, in append_op
    return self.main_program.current_block().append_op(*args, **kwargs)
  File "/home/wanglongfei02/.jumbo/lib/python2.7/site-packages/paddle/fluid/layers/nn.py", line 364, in embedding
    'padding_idx': padding_idx
  File "/home/wanglongfei02/git/baidu/erised/lab/social/models/layers.py", line 53, in embed
    param_attr=param)
  File "/home/wanglongfei02/git/baidu/erised/lab/social/models/utils.py", line 196, in wrapper
    return fn(*args, **kwargs)
  File "/home/wanglongfei02/git/baidu/erised/lab/social/models/mtl/module.py", line 87, in Encoder
    sparse_embed_seq = embed(sparse_feature, word_space, embed_size)
  File "/home/wanglongfei02/git/baidu/erised/lab/social/models/utils.py", line 196, in wrapper
    return fn(*args, **kwargs)
  File "/home/wanglongfei02/git/baidu/erised/lab/social/models/mtl/module.py", line 46, in __init__
    enc = Encoder(sparse_feature, word_space, embed_size, is_training, reuse=True, name="Encoder") # 编码阶段
  File "model.py", line 65, in _create_model
    self.args.l2_reg, self.args.is_training, name="task_3rd")
  File "model.py", line 52, in __init__
    self._create_model()
  File "model.py", line 284, in <module>
    demo = multi_task(args)
C++ Callstacks:
Enforce failed. Expected d_table_value->dims() == framework::flatten_to_2d(d_output_dims, d_output_dims.size() - 1), but received d_table_value->dims():423066, 32 != framework::flatten_to_2d(d_output_dims, d_output_dims.size() - 1):366697, 32.
指派人
分配到
无
里程碑
无
分配里程碑
工时统计
无
截止日期
无
标识: paddlepaddle/Paddle#18321
渝ICP备2023009037号

京公网安备11010502055752号

网络110报警服务 Powered by GitLab CE v13.7
开源知识
Git 入门 Pro Git 电子书 在线学 Git
Markdown 基础入门 IT 技术知识开源图谱
帮助
使用手册 反馈建议 博客
《GitCode 隐私声明》 《GitCode 服务条款》 关于GitCode
Powered by GitLab CE v13.7