提交 f39c93e6 编写于 作者: Z zhoushiyu 提交者: Thunderbrook

PaddleRec api update in release 1.6 (#3554)

* PaddleRec release 1.6 api update

* Update README.md

* Update README.md

bold remind of paddle version

* Update README.md

change paddle version note
上级 78e6a016
......@@ -10,6 +10,7 @@
├── network.py # 网络结构
├── config.py # 参数配置
├── reader.py # 读取数据相关的函数
├── utils.py # 通用函数
├── data/
├── download.sh # 下载数据脚本
├── preprocess.py # 数据预处理脚本
......@@ -23,7 +24,7 @@
DCN模型介绍可以参阅论文[Deep & Cross Network for Ad Click Predictions](https://arxiv.org/abs/1708.05123)
## 环境
- PaddlePaddle 1.6
- **目前模型库下模型均要求使用PaddlePaddle 1.6及以上版本或适当的develop版本**
## 数据下载
......
......@@ -7,6 +7,7 @@ from collections import OrderedDict
import paddle.fluid as fluid
from network import DCN
import utils
def parse_args():
......@@ -194,4 +195,5 @@ def train():
if __name__ == "__main__":
utils.check_version()
train()
......@@ -16,6 +16,7 @@ from config import parse_args
from reader import CriteoDataset
from network import DCN
from collections import OrderedDict
import utils
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger('fluid')
......@@ -94,4 +95,5 @@ def infer():
if __name__ == '__main__':
utils.check_version()
infer()
......@@ -11,6 +11,7 @@ import paddle.fluid as fluid
from config import parse_args
from network import DCN
import utils
"""
train DCN model
"""
......@@ -86,4 +87,5 @@ def train(args):
if __name__ == '__main__':
args = parse_args()
print(args)
utils.check_version()
train(args)
......@@ -40,13 +40,13 @@ class DCN(object):
def build_network(self, is_test=False):
# data input
self.target_input = fluid.layers.data(
name='label', shape=[1], dtype='float32')
self.target_input = fluid.data(
name='label', shape=[None, 1], dtype='float32')
data_dict = OrderedDict()
for feat_name in self.feat_dims_dict:
data_dict[feat_name] = fluid.layers.data(
name=feat_name, shape=[1], dtype='float32')
data_dict[feat_name] = fluid.data(
name=feat_name, shape=[None, 1], dtype='float32')
self.net_input = self._create_embedding_input(data_dict)
......@@ -120,7 +120,7 @@ class DCN(object):
def _create_embedding_input(self, data_dict):
# sparse embedding
sparse_emb_dict = OrderedDict((name, fluid.layers.embedding(
sparse_emb_dict = OrderedDict((name, fluid.embedding(
input=fluid.layers.cast(
data_dict[name], dtype='int64'),
size=[
......
import sys
import paddle.fluid as fluid
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
__all__ = ['check_version']
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 1.6 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('1.6.0')
except Exception as e:
logger.error(err)
sys.exit(1)
......@@ -15,7 +15,7 @@ This model implementation reproduces the result of the paper "DeepFM: A Factoriz
```
## Environment
- PaddlePaddle 1.6
- **Now all models in PaddleRec require PaddlePaddle version 1.6 or higher, or suitable develop version.**
## Download and preprocess data
......@@ -80,7 +80,7 @@ other params explained in cluster_train.py
Infer
```bash
python infer.py --model_output_dir cluster_model --test_epoch 10 --test_data_dir=dist_data/dist_test_data --feat_dict='dist_data/aid_data/feat_dict_10.pkl2'
python infer.py --model_output_dir cluster_model --test_epoch 10 --num_feat 141443 --test_data_dir=dist_data/dist_test_data --feat_dict='dist_data/aid_data/feat_dict_10.pkl2'
```
Notes:
......
......@@ -5,6 +5,7 @@ import time
from network_conf import ctr_deepfm_model
import paddle.fluid as fluid
import utils
def parse_args():
......@@ -153,7 +154,7 @@ def train():
dataset=dataset,
fetch_list=[loss],
fetch_info=['epoch %d batch loss' % (epoch_id + 1)],
print_period=20,
print_period=5,
debug=False)
model_dir = args.model_output_dir + '/epoch_' + str(epoch_id + 1)
sys.stderr.write('epoch%d is finished and takes %f s\n' % (
......@@ -188,4 +189,5 @@ def train():
if __name__ == "__main__":
utils.check_version()
train()
......@@ -11,6 +11,7 @@ import paddle.fluid as fluid
from args import parse_args
from criteo_reader import CriteoDataset
from network_conf import ctr_deepfm_model
import utils
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger('fluid')
......@@ -71,4 +72,5 @@ def infer():
if __name__ == '__main__':
utils.check_version()
infer()
......@@ -6,6 +6,7 @@ from network_conf import ctr_deepfm_model
import time
import numpy
import pickle
import utils
def train():
......@@ -59,4 +60,5 @@ def train():
if __name__ == '__main__':
utils.check_version()
train()
......@@ -11,12 +11,12 @@ def ctr_deepfm_model(embedding_size,
is_sparse=False):
init_value_ = 0.1
raw_feat_idx = fluid.layers.data(
name='feat_idx', shape=[num_field], dtype='int64')
raw_feat_value = fluid.layers.data(
name='feat_value', shape=[num_field], dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='float32') # None * 1
raw_feat_idx = fluid.data(
name='feat_idx', shape=[None, num_field], dtype='int64')
raw_feat_value = fluid.data(
name='feat_value', shape=[None, num_field], dtype='float32')
label = fluid.data(
name='label', shape=[None, 1], dtype='float32') # None * 1
feat_idx = fluid.layers.reshape(raw_feat_idx,
[-1, 1]) # (None * num_field) * 1
......@@ -25,7 +25,7 @@ def ctr_deepfm_model(embedding_size,
# -------------------- first order term --------------------
first_weights_re = fluid.layers.embedding(
first_weights_re = fluid.embedding(
input=feat_idx,
is_sparse=is_sparse,
dtype='float32',
......@@ -41,7 +41,7 @@ def ctr_deepfm_model(embedding_size,
# -------------------- second order term --------------------
feat_embeddings_re = fluid.layers.embedding(
feat_embeddings_re = fluid.embedding(
input=feat_idx,
is_sparse=is_sparse,
dtype='float32',
......
import sys
import paddle.fluid as fluid
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
__all__ = ['check_version']
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 1.6 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('1.6.0')
except Exception as e:
logger.error(err)
sys.exit(1)
......@@ -15,6 +15,7 @@
```
## 运行环境
**要求使用PaddlePaddle 1.6及以上版本或适当的develop版本。**
需要先安装PaddlePaddle Fluid,然后运行:
```shell
......
......@@ -20,6 +20,7 @@ factorization machines, please refer to the paper [factorization
machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf)
## Environment
**Now all models in PaddleRec require PaddlePaddle version 1.6 or higher, or suitable develop version.**
You should install PaddlePaddle Fluid first, and run:
```shell
......
......@@ -10,6 +10,7 @@ import paddle.fluid as fluid
import reader
from network_conf import ctr_dnn_model
import utils
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("fluid")
......@@ -91,4 +92,5 @@ def infer():
if __name__ == '__main__':
utils.check_version()
infer()
......@@ -31,20 +31,22 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim,
"""
sparse_fm_layer
"""
first_embeddings = fluid.layers.embedding(
first_embeddings = fluid.embedding(
input=input,
dtype='float32',
size=[emb_dict_size, 1],
is_sparse=True)
first_embeddings = fluid.layers.squeeze(input=first_embeddings, axes=[1])
first_order = fluid.layers.sequence_pool(
input=first_embeddings, pool_type='sum')
nonzero_embeddings = fluid.layers.embedding(
nonzero_embeddings = fluid.embedding(
input=input,
dtype='float32',
size=[emb_dict_size, factor_size],
param_attr=fm_param_attr,
is_sparse=True)
nonzero_embeddings = fluid.layers.squeeze(input=nonzero_embeddings, axes=[1])
summed_features_emb = fluid.layers.sequence_pool(
input=nonzero_embeddings, pool_type='sum')
summed_features_emb_square = fluid.layers.square(summed_features_emb)
......@@ -57,8 +59,8 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim,
summed_features_emb_square - squared_sum_features_emb)
return first_order, second_order
dense_input = fluid.layers.data(
name="dense_input", shape=[dense_feature_dim], dtype='float32')
dense_input = fluid.data(
name="dense_input", shape=[None, dense_feature_dim], dtype='float32')
sparse_input_ids = [
fluid.layers.data(
......@@ -66,7 +68,7 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim,
for i in range(1, 27)
]
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
datas = [dense_input] + sparse_input_ids + [label]
......@@ -96,6 +98,7 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim,
size=[sparse_feature_dim, factor_size],
param_attr=sparse_fm_param_attr,
is_sparse=True)
emb = fluid.layers.squeeze(input=emb, axes=[1])
return fluid.layers.sequence_pool(input=emb, pool_type='average')
sparse_embed_seq = list(map(embedding_layer, sparse_input_ids))
......@@ -139,7 +142,7 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim,
def ctr_dnn_model(embedding_size, sparse_feature_dim, use_py_reader=True):
def embedding_layer(input):
"""embedding_layer"""
emb = fluid.layers.embedding(
emb = fluid.embedding(
input=input,
is_sparse=True,
# you need to patch https://github.com/PaddlePaddle/Paddle/pull/14190
......@@ -149,18 +152,19 @@ def ctr_dnn_model(embedding_size, sparse_feature_dim, use_py_reader=True):
param_attr=fluid.ParamAttr(
name="SparseFeatFactors",
initializer=fluid.initializer.Uniform()))
emb = fluid.layers.squeeze(input=emb, axes=[1])
return fluid.layers.sequence_pool(input=emb, pool_type='average')
dense_input = fluid.layers.data(
name="dense_input", shape=[dense_feature_dim], dtype='float32')
dense_input = fluid.data(
name="dense_input", shape=[None, dense_feature_dim], dtype='float32')
sparse_input_ids = [
fluid.layers.data(
name="C" + str(i), shape=[1], lod_level=1, dtype='int64')
fluid.data(
name="C" + str(i), shape=[None, 1], lod_level=1, dtype='int64')
for i in range(1, 27)
]
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
words = [dense_input] + sparse_input_ids + [label]
......
......@@ -13,6 +13,7 @@ import paddle.fluid as fluid
import reader
from network_conf import ctr_dnn_model
from multiprocessing import cpu_count
import utils
# disable gpu training for this example
os.environ["CUDA_VISIBLE_DEVICES"] = ""
......@@ -269,4 +270,5 @@ def get_cards(args):
if __name__ == '__main__':
utils.check_version()
train()
import sys
import paddle.fluid as fluid
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
__all__ = ['check_version']
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 1.6 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('1.6.0')
except Exception as e:
logger.error(err)
sys.exit(1)
......@@ -12,7 +12,7 @@ sh download.sh
```
## 环境
- PaddlePaddle 1.6
- **要求使用PaddlePaddle 1.6及以上版本或适当的develop版本。**
## 单机训练
```bash
......
......@@ -5,6 +5,7 @@ import time
import network_conf
import paddle.fluid as fluid
import utils
def parse_args():
......@@ -193,4 +194,5 @@ def train():
if __name__ == "__main__":
utils.check_version()
train()
......@@ -8,6 +8,7 @@ import paddle.fluid as fluid
from args import parse_args
from criteo_reader import CriteoDataset
import network_conf
import utils
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger('fluid')
......@@ -72,4 +73,5 @@ def infer():
if __name__ == '__main__':
utils.check_version()
infer()
......@@ -56,4 +56,5 @@ def train():
if __name__ == '__main__':
utils.check_version()
train()
......@@ -14,18 +14,18 @@ def ctr_xdeepfm_model(embedding_size,
initer = fluid.initializer.TruncatedNormalInitializer(
loc=0.0, scale=init_value_)
raw_feat_idx = fluid.layers.data(
name='feat_idx', shape=[num_field], dtype='int64')
raw_feat_value = fluid.layers.data(
name='feat_value', shape=[num_field], dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='float32') # None * 1
raw_feat_idx = fluid.data(
name='feat_idx', shape=[None, num_field], dtype='int64')
raw_feat_value = fluid.data(
name='feat_value', shape=[None, num_field], dtype='float32')
label = fluid.data(
name='label', shape=[None, 1], dtype='float32') # None * 1
feat_idx = fluid.layers.reshape(raw_feat_idx,
[-1, 1]) # (None * num_field) * 1
feat_value = fluid.layers.reshape(
raw_feat_value, [-1, num_field, 1]) # None * num_field * 1
feat_embeddings = fluid.layers.embedding(
feat_embeddings = fluid.embedding(
input=feat_idx,
is_sparse=is_sparse,
dtype='float32',
......@@ -39,7 +39,7 @@ def ctr_xdeepfm_model(embedding_size,
# -------------------- linear --------------------
weights_linear = fluid.layers.embedding(
weights_linear = fluid.embedding(
input=feat_idx,
is_sparse=is_sparse,
dtype='float32',
......
import sys
import paddle.fluid as fluid
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
__all__ = ['check_version']
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 1.6 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('1.6.0')
except Exception as e:
logger.error(err)
sys.exit(1)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册