diff --git a/models/rank/logistic_regression/config.yaml b/models/rank/logistic_regression/config.yaml index 7052ba27ae09b867535e1adae0db26b89dd97d9d..2d8bc786cd3966e8c1fb4f53e997cec12f0da890 100644 --- a/models/rank/logistic_regression/config.yaml +++ b/models/rank/logistic_regression/config.yaml @@ -19,13 +19,13 @@ workspace: "models/rank/logistic_regression" dataset: - name: train_sample - type: QueueDataset + type: DataLoader batch_size: 5 data_path: "{workspace}/data/sample_data/train" sparse_slots: "label feat_idx" dense_slots: "feat_value:39" - name: infer_sample - type: QueueDataset + type: DataLoader batch_size: 5 data_path: "{workspace}/data/sample_data/train" sparse_slots: "label feat_idx" @@ -33,15 +33,15 @@ dataset: hyper_parameters: optimizer: - class: SGD - learning_rate: 0.0001 + class: Adam + learning_rate: 0.001 sparse_feature_number: 1086460 sparse_feature_dim: 9 num_field: 39 reg: 0.001 -mode: train_runner +mode: [train_runner,infer_runner] # if infer, change mode to "infer_runner" and change phase to "infer_phase" runner: @@ -58,7 +58,7 @@ runner: - name: infer_runner class: infer device: cpu - init_model_path: "increment/0" + init_model_path: "increment/1" print_interval: 1 @@ -66,8 +66,8 @@ phase: - name: phase1 model: "{workspace}/model.py" dataset_name: train_sample - thread_num: 1 -#- name: infer_phase -# model: "{workspace}/model.py" -# dataset_name: infer_sample -# thread_num: 1 + thread_num: 10 +- name: infer_phase + model: "{workspace}/model.py" + dataset_name: infer_sample + thread_num: 10 diff --git a/models/rank/logistic_regression/readme.md b/models/rank/logistic_regression/readme.md index 226667c535f6318350119ba93113a02116c5c5ef..2e348e489cc4f12990727edd931e1536900e5231 100644 --- a/models/rank/logistic_regression/readme.md +++ b/models/rank/logistic_regression/readme.md @@ -1,6 +1,37 @@ # 基于logistic_regression模型的点击率预估模型 -## 介绍 +以下是本例的简要目录结构及说明: + +``` +├── sample_data #样例数据 + ├── train + ├── sample_train.txt #训练数据样例 + ├── preprocess.py #数据处理程序 + ├── run.sh #数据一键处理脚本 + ├── download_preprocess.py #数据下载脚本 + ├── get_slot_data.py #格式整理程序 +├── __init__.py +├── README.md #文档 +├── model.py #模型文件 +├── config.yaml #配置文件 +``` + +注:在阅读该示例前,建议您先了解以下内容: + +[paddlerec入门教程](https://github.com/PaddlePaddle/PaddleRec/blob/master/README.md) + +## 内容 + +- [模型简介](#模型简介) +- [数据准备](#数据准备) +- [运行环境](#运行环境) +- [快速开始](#快速开始) +- [模型组网](#模型组网) +- [效果复现](#效果复现) +- [进阶使用](#进阶使用) +- [FAQ](#FAQ) + +## 模型简介 `CTR(Click Through Rate)`,即点击率,是“推荐系统/计算广告”等领域的重要指标,对其进行预估是商品推送/广告投放等决策的基础。简单来说,CTR预估对每次广告的点击情况做出预测,预测用户是点击还是不点击。CTR预估模型综合考虑各种因素、特征,在大量历史数据上训练,最终对商业决策提供帮助。本模型实现了下述论文中的logistic_regression模型: ```text @@ -20,12 +51,8 @@ ```bash