提交 f39c93e6 编写于 作者: Z zhoushiyu 提交者: Thunderbrook

PaddleRec api update in release 1.6 (#3554)

* PaddleRec release 1.6 api update

* Update README.md

* Update README.md

bold remind of paddle version

* Update README.md

change paddle version note
上级 78e6a016
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
├── network.py # 网络结构 ├── network.py # 网络结构
├── config.py # 参数配置 ├── config.py # 参数配置
├── reader.py # 读取数据相关的函数 ├── reader.py # 读取数据相关的函数
├── utils.py # 通用函数
├── data/ ├── data/
├── download.sh # 下载数据脚本 ├── download.sh # 下载数据脚本
├── preprocess.py # 数据预处理脚本 ├── preprocess.py # 数据预处理脚本
...@@ -23,7 +24,7 @@ ...@@ -23,7 +24,7 @@
DCN模型介绍可以参阅论文[Deep & Cross Network for Ad Click Predictions](https://arxiv.org/abs/1708.05123) DCN模型介绍可以参阅论文[Deep & Cross Network for Ad Click Predictions](https://arxiv.org/abs/1708.05123)
## 环境 ## 环境
- PaddlePaddle 1.6 - **目前模型库下模型均要求使用PaddlePaddle 1.6及以上版本或适当的develop版本**
## 数据下载 ## 数据下载
......
...@@ -7,6 +7,7 @@ from collections import OrderedDict ...@@ -7,6 +7,7 @@ from collections import OrderedDict
import paddle.fluid as fluid import paddle.fluid as fluid
from network import DCN from network import DCN
import utils
def parse_args(): def parse_args():
...@@ -194,4 +195,5 @@ def train(): ...@@ -194,4 +195,5 @@ def train():
if __name__ == "__main__": if __name__ == "__main__":
utils.check_version()
train() train()
...@@ -16,6 +16,7 @@ from config import parse_args ...@@ -16,6 +16,7 @@ from config import parse_args
from reader import CriteoDataset from reader import CriteoDataset
from network import DCN from network import DCN
from collections import OrderedDict from collections import OrderedDict
import utils
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger('fluid') logger = logging.getLogger('fluid')
...@@ -94,4 +95,5 @@ def infer(): ...@@ -94,4 +95,5 @@ def infer():
if __name__ == '__main__': if __name__ == '__main__':
utils.check_version()
infer() infer()
...@@ -11,6 +11,7 @@ import paddle.fluid as fluid ...@@ -11,6 +11,7 @@ import paddle.fluid as fluid
from config import parse_args from config import parse_args
from network import DCN from network import DCN
import utils
""" """
train DCN model train DCN model
""" """
...@@ -86,4 +87,5 @@ def train(args): ...@@ -86,4 +87,5 @@ def train(args):
if __name__ == '__main__': if __name__ == '__main__':
args = parse_args() args = parse_args()
print(args) print(args)
utils.check_version()
train(args) train(args)
...@@ -40,13 +40,13 @@ class DCN(object): ...@@ -40,13 +40,13 @@ class DCN(object):
def build_network(self, is_test=False): def build_network(self, is_test=False):
# data input # data input
self.target_input = fluid.layers.data( self.target_input = fluid.data(
name='label', shape=[1], dtype='float32') name='label', shape=[None, 1], dtype='float32')
data_dict = OrderedDict() data_dict = OrderedDict()
for feat_name in self.feat_dims_dict: for feat_name in self.feat_dims_dict:
data_dict[feat_name] = fluid.layers.data( data_dict[feat_name] = fluid.data(
name=feat_name, shape=[1], dtype='float32') name=feat_name, shape=[None, 1], dtype='float32')
self.net_input = self._create_embedding_input(data_dict) self.net_input = self._create_embedding_input(data_dict)
...@@ -120,7 +120,7 @@ class DCN(object): ...@@ -120,7 +120,7 @@ class DCN(object):
def _create_embedding_input(self, data_dict): def _create_embedding_input(self, data_dict):
# sparse embedding # sparse embedding
sparse_emb_dict = OrderedDict((name, fluid.layers.embedding( sparse_emb_dict = OrderedDict((name, fluid.embedding(
input=fluid.layers.cast( input=fluid.layers.cast(
data_dict[name], dtype='int64'), data_dict[name], dtype='int64'),
size=[ size=[
......
import sys
import paddle.fluid as fluid
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
__all__ = ['check_version']
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 1.6 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('1.6.0')
except Exception as e:
logger.error(err)
sys.exit(1)
...@@ -15,7 +15,7 @@ This model implementation reproduces the result of the paper "DeepFM: A Factoriz ...@@ -15,7 +15,7 @@ This model implementation reproduces the result of the paper "DeepFM: A Factoriz
``` ```
## Environment ## Environment
- PaddlePaddle 1.6 - **Now all models in PaddleRec require PaddlePaddle version 1.6 or higher, or suitable develop version.**
## Download and preprocess data ## Download and preprocess data
...@@ -80,7 +80,7 @@ other params explained in cluster_train.py ...@@ -80,7 +80,7 @@ other params explained in cluster_train.py
Infer Infer
```bash ```bash
python infer.py --model_output_dir cluster_model --test_epoch 10 --test_data_dir=dist_data/dist_test_data --feat_dict='dist_data/aid_data/feat_dict_10.pkl2' python infer.py --model_output_dir cluster_model --test_epoch 10 --num_feat 141443 --test_data_dir=dist_data/dist_test_data --feat_dict='dist_data/aid_data/feat_dict_10.pkl2'
``` ```
Notes: Notes:
......
...@@ -5,6 +5,7 @@ import time ...@@ -5,6 +5,7 @@ import time
from network_conf import ctr_deepfm_model from network_conf import ctr_deepfm_model
import paddle.fluid as fluid import paddle.fluid as fluid
import utils
def parse_args(): def parse_args():
...@@ -153,7 +154,7 @@ def train(): ...@@ -153,7 +154,7 @@ def train():
dataset=dataset, dataset=dataset,
fetch_list=[loss], fetch_list=[loss],
fetch_info=['epoch %d batch loss' % (epoch_id + 1)], fetch_info=['epoch %d batch loss' % (epoch_id + 1)],
print_period=20, print_period=5,
debug=False) debug=False)
model_dir = args.model_output_dir + '/epoch_' + str(epoch_id + 1) model_dir = args.model_output_dir + '/epoch_' + str(epoch_id + 1)
sys.stderr.write('epoch%d is finished and takes %f s\n' % ( sys.stderr.write('epoch%d is finished and takes %f s\n' % (
...@@ -188,4 +189,5 @@ def train(): ...@@ -188,4 +189,5 @@ def train():
if __name__ == "__main__": if __name__ == "__main__":
utils.check_version()
train() train()
...@@ -11,6 +11,7 @@ import paddle.fluid as fluid ...@@ -11,6 +11,7 @@ import paddle.fluid as fluid
from args import parse_args from args import parse_args
from criteo_reader import CriteoDataset from criteo_reader import CriteoDataset
from network_conf import ctr_deepfm_model from network_conf import ctr_deepfm_model
import utils
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger('fluid') logger = logging.getLogger('fluid')
...@@ -71,4 +72,5 @@ def infer(): ...@@ -71,4 +72,5 @@ def infer():
if __name__ == '__main__': if __name__ == '__main__':
utils.check_version()
infer() infer()
...@@ -6,6 +6,7 @@ from network_conf import ctr_deepfm_model ...@@ -6,6 +6,7 @@ from network_conf import ctr_deepfm_model
import time import time
import numpy import numpy
import pickle import pickle
import utils
def train(): def train():
...@@ -59,4 +60,5 @@ def train(): ...@@ -59,4 +60,5 @@ def train():
if __name__ == '__main__': if __name__ == '__main__':
utils.check_version()
train() train()
...@@ -11,12 +11,12 @@ def ctr_deepfm_model(embedding_size, ...@@ -11,12 +11,12 @@ def ctr_deepfm_model(embedding_size,
is_sparse=False): is_sparse=False):
init_value_ = 0.1 init_value_ = 0.1
raw_feat_idx = fluid.layers.data( raw_feat_idx = fluid.data(
name='feat_idx', shape=[num_field], dtype='int64') name='feat_idx', shape=[None, num_field], dtype='int64')
raw_feat_value = fluid.layers.data( raw_feat_value = fluid.data(
name='feat_value', shape=[num_field], dtype='float32') name='feat_value', shape=[None, num_field], dtype='float32')
label = fluid.layers.data( label = fluid.data(
name='label', shape=[1], dtype='float32') # None * 1 name='label', shape=[None, 1], dtype='float32') # None * 1
feat_idx = fluid.layers.reshape(raw_feat_idx, feat_idx = fluid.layers.reshape(raw_feat_idx,
[-1, 1]) # (None * num_field) * 1 [-1, 1]) # (None * num_field) * 1
...@@ -25,7 +25,7 @@ def ctr_deepfm_model(embedding_size, ...@@ -25,7 +25,7 @@ def ctr_deepfm_model(embedding_size,
# -------------------- first order term -------------------- # -------------------- first order term --------------------
first_weights_re = fluid.layers.embedding( first_weights_re = fluid.embedding(
input=feat_idx, input=feat_idx,
is_sparse=is_sparse, is_sparse=is_sparse,
dtype='float32', dtype='float32',
...@@ -41,7 +41,7 @@ def ctr_deepfm_model(embedding_size, ...@@ -41,7 +41,7 @@ def ctr_deepfm_model(embedding_size,
# -------------------- second order term -------------------- # -------------------- second order term --------------------
feat_embeddings_re = fluid.layers.embedding( feat_embeddings_re = fluid.embedding(
input=feat_idx, input=feat_idx,
is_sparse=is_sparse, is_sparse=is_sparse,
dtype='float32', dtype='float32',
......
import sys
import paddle.fluid as fluid
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
__all__ = ['check_version']
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 1.6 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('1.6.0')
except Exception as e:
logger.error(err)
sys.exit(1)
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
``` ```
## 运行环境 ## 运行环境
**要求使用PaddlePaddle 1.6及以上版本或适当的develop版本。**
需要先安装PaddlePaddle Fluid,然后运行: 需要先安装PaddlePaddle Fluid,然后运行:
```shell ```shell
......
...@@ -20,6 +20,7 @@ factorization machines, please refer to the paper [factorization ...@@ -20,6 +20,7 @@ factorization machines, please refer to the paper [factorization
machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf)
## Environment ## Environment
**Now all models in PaddleRec require PaddlePaddle version 1.6 or higher, or suitable develop version.**
You should install PaddlePaddle Fluid first, and run: You should install PaddlePaddle Fluid first, and run:
```shell ```shell
......
...@@ -10,6 +10,7 @@ import paddle.fluid as fluid ...@@ -10,6 +10,7 @@ import paddle.fluid as fluid
import reader import reader
from network_conf import ctr_dnn_model from network_conf import ctr_dnn_model
import utils
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("fluid") logger = logging.getLogger("fluid")
...@@ -91,4 +92,5 @@ def infer(): ...@@ -91,4 +92,5 @@ def infer():
if __name__ == '__main__': if __name__ == '__main__':
utils.check_version()
infer() infer()
...@@ -31,20 +31,22 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, ...@@ -31,20 +31,22 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim,
""" """
sparse_fm_layer sparse_fm_layer
""" """
first_embeddings = fluid.layers.embedding( first_embeddings = fluid.embedding(
input=input, input=input,
dtype='float32', dtype='float32',
size=[emb_dict_size, 1], size=[emb_dict_size, 1],
is_sparse=True) is_sparse=True)
first_embeddings = fluid.layers.squeeze(input=first_embeddings, axes=[1])
first_order = fluid.layers.sequence_pool( first_order = fluid.layers.sequence_pool(
input=first_embeddings, pool_type='sum') input=first_embeddings, pool_type='sum')
nonzero_embeddings = fluid.layers.embedding( nonzero_embeddings = fluid.embedding(
input=input, input=input,
dtype='float32', dtype='float32',
size=[emb_dict_size, factor_size], size=[emb_dict_size, factor_size],
param_attr=fm_param_attr, param_attr=fm_param_attr,
is_sparse=True) is_sparse=True)
nonzero_embeddings = fluid.layers.squeeze(input=nonzero_embeddings, axes=[1])
summed_features_emb = fluid.layers.sequence_pool( summed_features_emb = fluid.layers.sequence_pool(
input=nonzero_embeddings, pool_type='sum') input=nonzero_embeddings, pool_type='sum')
summed_features_emb_square = fluid.layers.square(summed_features_emb) summed_features_emb_square = fluid.layers.square(summed_features_emb)
...@@ -57,8 +59,8 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, ...@@ -57,8 +59,8 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim,
summed_features_emb_square - squared_sum_features_emb) summed_features_emb_square - squared_sum_features_emb)
return first_order, second_order return first_order, second_order
dense_input = fluid.layers.data( dense_input = fluid.data(
name="dense_input", shape=[dense_feature_dim], dtype='float32') name="dense_input", shape=[None, dense_feature_dim], dtype='float32')
sparse_input_ids = [ sparse_input_ids = [
fluid.layers.data( fluid.layers.data(
...@@ -66,7 +68,7 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, ...@@ -66,7 +68,7 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim,
for i in range(1, 27) for i in range(1, 27)
] ]
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.data(name='label', shape=[None, 1], dtype='int64')
datas = [dense_input] + sparse_input_ids + [label] datas = [dense_input] + sparse_input_ids + [label]
...@@ -96,6 +98,7 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, ...@@ -96,6 +98,7 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim,
size=[sparse_feature_dim, factor_size], size=[sparse_feature_dim, factor_size],
param_attr=sparse_fm_param_attr, param_attr=sparse_fm_param_attr,
is_sparse=True) is_sparse=True)
emb = fluid.layers.squeeze(input=emb, axes=[1])
return fluid.layers.sequence_pool(input=emb, pool_type='average') return fluid.layers.sequence_pool(input=emb, pool_type='average')
sparse_embed_seq = list(map(embedding_layer, sparse_input_ids)) sparse_embed_seq = list(map(embedding_layer, sparse_input_ids))
...@@ -139,7 +142,7 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, ...@@ -139,7 +142,7 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim,
def ctr_dnn_model(embedding_size, sparse_feature_dim, use_py_reader=True): def ctr_dnn_model(embedding_size, sparse_feature_dim, use_py_reader=True):
def embedding_layer(input): def embedding_layer(input):
"""embedding_layer""" """embedding_layer"""
emb = fluid.layers.embedding( emb = fluid.embedding(
input=input, input=input,
is_sparse=True, is_sparse=True,
# you need to patch https://github.com/PaddlePaddle/Paddle/pull/14190 # you need to patch https://github.com/PaddlePaddle/Paddle/pull/14190
...@@ -149,18 +152,19 @@ def ctr_dnn_model(embedding_size, sparse_feature_dim, use_py_reader=True): ...@@ -149,18 +152,19 @@ def ctr_dnn_model(embedding_size, sparse_feature_dim, use_py_reader=True):
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="SparseFeatFactors", name="SparseFeatFactors",
initializer=fluid.initializer.Uniform())) initializer=fluid.initializer.Uniform()))
emb = fluid.layers.squeeze(input=emb, axes=[1])
return fluid.layers.sequence_pool(input=emb, pool_type='average') return fluid.layers.sequence_pool(input=emb, pool_type='average')
dense_input = fluid.layers.data( dense_input = fluid.data(
name="dense_input", shape=[dense_feature_dim], dtype='float32') name="dense_input", shape=[None, dense_feature_dim], dtype='float32')
sparse_input_ids = [ sparse_input_ids = [
fluid.layers.data( fluid.data(
name="C" + str(i), shape=[1], lod_level=1, dtype='int64') name="C" + str(i), shape=[None, 1], lod_level=1, dtype='int64')
for i in range(1, 27) for i in range(1, 27)
] ]
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.data(name='label', shape=[None, 1], dtype='int64')
words = [dense_input] + sparse_input_ids + [label] words = [dense_input] + sparse_input_ids + [label]
......
...@@ -13,6 +13,7 @@ import paddle.fluid as fluid ...@@ -13,6 +13,7 @@ import paddle.fluid as fluid
import reader import reader
from network_conf import ctr_dnn_model from network_conf import ctr_dnn_model
from multiprocessing import cpu_count from multiprocessing import cpu_count
import utils
# disable gpu training for this example # disable gpu training for this example
os.environ["CUDA_VISIBLE_DEVICES"] = "" os.environ["CUDA_VISIBLE_DEVICES"] = ""
...@@ -269,4 +270,5 @@ def get_cards(args): ...@@ -269,4 +270,5 @@ def get_cards(args):
if __name__ == '__main__': if __name__ == '__main__':
utils.check_version()
train() train()
import sys
import paddle.fluid as fluid
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
__all__ = ['check_version']
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 1.6 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('1.6.0')
except Exception as e:
logger.error(err)
sys.exit(1)
...@@ -12,7 +12,7 @@ sh download.sh ...@@ -12,7 +12,7 @@ sh download.sh
``` ```
## 环境 ## 环境
- PaddlePaddle 1.6 - **要求使用PaddlePaddle 1.6及以上版本或适当的develop版本。**
## 单机训练 ## 单机训练
```bash ```bash
......
...@@ -5,6 +5,7 @@ import time ...@@ -5,6 +5,7 @@ import time
import network_conf import network_conf
import paddle.fluid as fluid import paddle.fluid as fluid
import utils
def parse_args(): def parse_args():
...@@ -193,4 +194,5 @@ def train(): ...@@ -193,4 +194,5 @@ def train():
if __name__ == "__main__": if __name__ == "__main__":
utils.check_version()
train() train()
...@@ -8,6 +8,7 @@ import paddle.fluid as fluid ...@@ -8,6 +8,7 @@ import paddle.fluid as fluid
from args import parse_args from args import parse_args
from criteo_reader import CriteoDataset from criteo_reader import CriteoDataset
import network_conf import network_conf
import utils
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger('fluid') logger = logging.getLogger('fluid')
...@@ -72,4 +73,5 @@ def infer(): ...@@ -72,4 +73,5 @@ def infer():
if __name__ == '__main__': if __name__ == '__main__':
utils.check_version()
infer() infer()
...@@ -56,4 +56,5 @@ def train(): ...@@ -56,4 +56,5 @@ def train():
if __name__ == '__main__': if __name__ == '__main__':
utils.check_version()
train() train()
...@@ -14,18 +14,18 @@ def ctr_xdeepfm_model(embedding_size, ...@@ -14,18 +14,18 @@ def ctr_xdeepfm_model(embedding_size,
initer = fluid.initializer.TruncatedNormalInitializer( initer = fluid.initializer.TruncatedNormalInitializer(
loc=0.0, scale=init_value_) loc=0.0, scale=init_value_)
raw_feat_idx = fluid.layers.data( raw_feat_idx = fluid.data(
name='feat_idx', shape=[num_field], dtype='int64') name='feat_idx', shape=[None, num_field], dtype='int64')
raw_feat_value = fluid.layers.data( raw_feat_value = fluid.data(
name='feat_value', shape=[num_field], dtype='float32') name='feat_value', shape=[None, num_field], dtype='float32')
label = fluid.layers.data( label = fluid.data(
name='label', shape=[1], dtype='float32') # None * 1 name='label', shape=[None, 1], dtype='float32') # None * 1
feat_idx = fluid.layers.reshape(raw_feat_idx, feat_idx = fluid.layers.reshape(raw_feat_idx,
[-1, 1]) # (None * num_field) * 1 [-1, 1]) # (None * num_field) * 1
feat_value = fluid.layers.reshape( feat_value = fluid.layers.reshape(
raw_feat_value, [-1, num_field, 1]) # None * num_field * 1 raw_feat_value, [-1, num_field, 1]) # None * num_field * 1
feat_embeddings = fluid.layers.embedding( feat_embeddings = fluid.embedding(
input=feat_idx, input=feat_idx,
is_sparse=is_sparse, is_sparse=is_sparse,
dtype='float32', dtype='float32',
...@@ -39,7 +39,7 @@ def ctr_xdeepfm_model(embedding_size, ...@@ -39,7 +39,7 @@ def ctr_xdeepfm_model(embedding_size,
# -------------------- linear -------------------- # -------------------- linear --------------------
weights_linear = fluid.layers.embedding( weights_linear = fluid.embedding(
input=feat_idx, input=feat_idx,
is_sparse=is_sparse, is_sparse=is_sparse,
dtype='float32', dtype='float32',
......
import sys
import paddle.fluid as fluid
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
__all__ = ['check_version']
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 1.6 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('1.6.0')
except Exception as e:
logger.error(err)
sys.exit(1)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册