Skip to content

  • 体验新版
    • 正在加载...
  • 登录
  • PaddlePaddle
  • models
  • Issue
  • #2004

M
models
  • 项目概览

PaddlePaddle / models
大约 2 年 前同步成功

通知 232
Star 6828
Fork 2962
  • 代码
    • 文件
    • 提交
    • 分支
    • Tags
    • 贡献者
    • 分支图
    • Diff
  • Issue 602
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 255
  • Wiki 0
    • Wiki
  • 分析
    • 仓库
    • DevOps
  • 项目成员
  • Pages
M
models
  • 项目概览
    • 项目概览
    • 详情
    • 发布
  • 仓库
    • 仓库
    • 文件
    • 提交
    • 分支
    • 标签
    • 贡献者
    • 分支图
    • 比较
  • Issue 602
    • Issue 602
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 255
    • 合并请求 255
  • Pages
  • 分析
    • 分析
    • 仓库分析
    • DevOps
  • Wiki 0
    • Wiki
  • 成员
    • 成员
  • 收起侧边栏
  • 动态
  • 分支图
  • 创建新Issue
  • 提交
  • Issue看板
已关闭
开放中
Opened 4月 08, 2019 by saxon_zh@saxon_zhGuest

测试集精度太低

Created by: Annnnnnnnnnnnn

1、样本格式: 不定长输入(某用户搜过的词) 类似:[1, 3, 4, 5 ....] [label]
[1, 2, 3, .....] [label] 这个样子 2、处理方法: 利用LodTensor表征上述不定长输入(取对应位置的embedding池化(sum))转化成成定长输入 3、paddle版本: 1.3 4、网络格式: 与deepctr中一致,三层全链接层 5、问题: 训练集精度稳定上升。测试精度一直(注意:是一直,测试精度没有下降的过程,貌似和过拟合不太想)不高。

class ctr_model(object):
    def __init__(self, args):
        """
        创建整个模型
        :param args: 配置文件
        """
        self.args = args
        place = fluid.CPUPlace()
        # place = fluid.CUDAPlace(0)
        self.exe = fluid.Executor(place)
        self.main_program = fluid.default_main_program()

        self._create_model()
        # self._create_opts()

    def _create_model(self):
        """
        创建网络
        """
        logger.info("create networks")

        with fluid.program_guard(self.main_program):
            with fluid.unique_name.guard():
                self.py_reader, self.words, self.predict, self.loss, self.acc = \
                                        ctr_dnn_model(self.args.embed_size, self.args.sparse_feature_dim,
                                        is_training=self.args.is_training)

                logger.info("create optimizer")
                self.optimizer = fluid.optimizer.Adam(learning_rate=self.args.lr)
                self.optimizer.minimize(self.loss)

    def _create_opts(self):
        """
        创建优化器
        """
        logger.info("create optimizer")
        self.optimizer = fluid.optimizer.Adam(learning_rate=self.args.lr)
        self.optimizer.minimize(self.loss)

    def train(self):
        """
        循环迭代
        """
        logger.info("run local training")

        # print [p.name for p in self.main_program.global_block().all_parameters()]

        # 必须在加载模型前调用,否则会覆盖已经加载的模型参数导致错误
        self.exe.run(fluid.default_startup_program())

        if not os.path.exists(self.args.checkpoint):
            os.makedirs(self.args.checkpoint)

        dataset = Reader(self.args.sparse_feature_dim)

        train_reader = paddle.batch(paddle.reader.shuffle(dataset.feed(glob.glob(os.path.join(self.args.train_dir, "part-*"))),
                                     buf_size=self.args.batch_size * 100), batch_size=self.args.batch_size)

        self.py_reader.decorate_paddle_reader(train_reader)

        feed_list = []
        fetch_list = [self.words[1], self.predict, self.loss, self.acc]
        # place = fluid.CPUPlace()
        # exe = fluid.Executor(place)

        exec_strategy = fluid.ExecutionStrategy()
        build_strategy = fluid.BuildStrategy()

        if os.getenv("NUM_THREADS", ""):
            exec_strategy.num_threads = int(os.getenv("NUM_THREADS"))

        cpu_num = int(os.environ.get('CPU_NUM', cpu_count()))
        build_strategy.reduce_strategy = \
                fluid.BuildStrategy.ReduceStrategy.Reduce if cpu_num > 1 \
                else fluid.BuildStrategy.ReduceStrategy.AllReduce


        self.compiled_program = fluid.CompiledProgram(self.main_program) \
                            .with_data_parallel(loss_name=self.loss.name,
                                                build_strategy=build_strategy,
                                                exec_strategy=exec_strategy)

        metric = fluid.metrics.Auc(name="train_auc")
        for pass_id in xrange(self.args.num_passes):
            pass_start = time.time()
            self.py_reader.start()
            batch_id = 0
            try:
                while True:
                    label, predict, loss, acc = self.exe.run(self.compiled_program,
                                                fetch_list=[self.words[1].name,
                                                self.predict.name,
                                                self.loss.name,
                                                self.acc.name])
                    loss = np.mean(loss)
                    acc = np.mean(acc)
                    metric.update(predict, label)
                    auc = metric.eval()
                    logger.info("TRAIN --> pass: {} batch: {:0=4} loss: {} acc: {}  auc: {}"
                            .format(pass_id, batch_id, loss, acc, auc))

                    if batch_id % 100 == 0 and batch_id != 0:

                        if self.args.trainer_id == 0:
                            self.save(self.args.checkpoint, feed_list, fetch_list, pass_id, batch_id)

                    batch_id += 1

            except fluid.core.EOFException:
                self.py_reader.reset()

            print("pass_id: %d, pass_time_cost: %f" % (pass_id, time.time() - pass_start))

            if self.args.trainer_id == 0:
                self.save(self.args.checkpoint, feed_list, fetch_list, pass_id, batch_id)
                pass

    def save(self, path, feed_list, fetch_list, pass_id, batch_id):
        """
        保存checkpoint
        :param path: 路径
        :param pass_id: epoch次数
        :param batch_id: 迭代次数
        """
        checkpoint_name = "ctr_model.ckpt-%04d-%04d" % (pass_id, batch_id)
        model_dir = os.path.join(path, checkpoint_name)
        # fluid.io.save_persistables(self.exe, model_dir, self.main_program)
        fluid.io.save_inference_model(model_dir, feed_list, fetch_list, self.exe, self.main_program)

    def load(self, path):
        """
        加载checkpoint
        :param path:路径
        """
        logger.info("[*] Reading checkpoint...")
        models = glob.glob(os.path.join(path, "*.ckpt-*"))
        if models:
            models = sorted(models, key=lambda x: x[-4:])
            fluid.io.load_persistables(self.exe, models[-1], self.main_program)
            batch_id = int(models[-1][-4:])
            return True

        else:
            return False


if __name__ == "__main__":
    args = Config()
    args = args()

    ctr_demo = ctr_model(args)
    ctr_demo.train()

验证:
# python
def eval():
    """
    验证模型
    """
    args = Config()
    args = args()

    place = fluid.CPUPlace()
    inference_scope = fluid.core.Scope()

    dataset = Reader(args.sparse_feature_dim)
    test_reader = paddle.batch(dataset.feed(glob.glob(os.path.join(args.test_dir, "part-*"))),
                                    batch_size=args.batch_size*100)

    startup_program = fluid.framework.Program()
    main_program = fluid.framework.Program()
    with fluid.framework.program_guard(main_program, startup_program):
        _, words, predict, loss, acc = ctr_dnn_model(args.embed_size, args.sparse_feature_dim, is_training=False)

    feeder = fluid.DataFeeder(feed_list=words, place=place)
    exe = fluid.Executor(place)
    metric = fluid.metrics.Auc(name="valid_auc")
    while True:
        logger.info("[*] Reading checkpoint...")
        ckpt = glob.glob(os.path.join(args.checkpoint, "*.ckpt-*"))

        if ckpt:
            logger.info("[*] Load Success...")
            ckpt = sorted(ckpt, key=lambda x: x[-9:])[-1]
            logger.info(ckpt)
            with fluid.scope_guard(inference_scope):
                [inference_program, _, fetch_targets] = fluid.io.load_inference_model(ckpt, exe)

                def set_zero(var_name):
                    """set auc state list to 0"""
                    param = inference_scope.var(var_name).get_tensor()
                    param_array = np.zeros(param._get_dims()).astype("int64")
                    param.set(param_array, place)

                auc_states_names = ['_generated_var_2', '_generated_var_3']
                # for name in auc_states_names:
                #     set_zero(name)

                for batch_id, data in enumerate(test_reader()):
                    label, predict, loss, acc = exe.run(inference_program,
                                                        feed=feeder.feed(data),
                                                        fetch_list=fetch_targets)
                    loss = np.mean(loss)
                    acc = np.mean(acc)
                    metric.update(predict, label)
                    auc = metric.eval()
                    # if batch_id % 100 == 0:
                    logger.info("TEST --> batch: {} loss: {} acc: {} auc: {}".format(batch_id, loss, acc, auc))
        else:
            logger.info("No checkpoint file found")

        time.sleep(EVAL_INTERVAL_SECS)


if __name__ == '__main__':
    eval()
指派人
分配到
无
里程碑
无
分配里程碑
工时统计
无
截止日期
无
标识: paddlepaddle/models#2004
渝ICP备2023009037号

京公网安备11010502055752号

网络110报警服务 Powered by GitLab CE v13.7
开源知识
Git 入门 Pro Git 电子书 在线学 Git
Markdown 基础入门 IT 技术知识开源图谱
帮助
使用手册 反馈建议 博客
《GitCode 隐私声明》 《GitCode 服务条款》 关于GitCode
Powered by GitLab CE v13.7