From f39c93e65554cb985c009860aaf2663eb0783aea Mon Sep 17 00:00:00 2001 From: zhoushiyu <31816202+wilhelmzh@users.noreply.github.com> Date: Mon, 14 Oct 2019 20:06:34 +0800 Subject: [PATCH] PaddleRec api update in release 1.6 (#3554) * PaddleRec release 1.6 api update * Update README.md * Update README.md bold remind of paddle version * Update README.md change paddle version note --- PaddleRec/ctr/dcn/README.md | 3 ++- PaddleRec/ctr/dcn/cluster_train.py | 2 ++ PaddleRec/ctr/dcn/infer.py | 2 ++ PaddleRec/ctr/dcn/local_train.py | 2 ++ PaddleRec/ctr/dcn/network.py | 10 +++++----- PaddleRec/ctr/dcn/utils.py | 24 ++++++++++++++++++++++++ PaddleRec/ctr/deepfm/README.md | 4 ++-- PaddleRec/ctr/deepfm/cluster_train.py | 4 +++- PaddleRec/ctr/deepfm/infer.py | 2 ++ PaddleRec/ctr/deepfm/local_train.py | 2 ++ PaddleRec/ctr/deepfm/network_conf.py | 16 ++++++++-------- PaddleRec/ctr/deepfm/utils.py | 24 ++++++++++++++++++++++++ PaddleRec/ctr/dnn/README.cn.md | 1 + PaddleRec/ctr/dnn/README.md | 1 + PaddleRec/ctr/dnn/infer.py | 2 ++ PaddleRec/ctr/dnn/network_conf.py | 26 +++++++++++++++----------- PaddleRec/ctr/dnn/train.py | 2 ++ PaddleRec/ctr/dnn/utils.py | 24 ++++++++++++++++++++++++ PaddleRec/ctr/xdeepfm/README.md | 2 +- PaddleRec/ctr/xdeepfm/cluster_train.py | 2 ++ PaddleRec/ctr/xdeepfm/infer.py | 2 ++ PaddleRec/ctr/xdeepfm/local_train.py | 1 + PaddleRec/ctr/xdeepfm/network_conf.py | 16 ++++++++-------- PaddleRec/ctr/xdeepfm/utils.py | 24 ++++++++++++++++++++++++ 24 files changed, 161 insertions(+), 37 deletions(-) create mode 100644 PaddleRec/ctr/dcn/utils.py create mode 100644 PaddleRec/ctr/deepfm/utils.py create mode 100644 PaddleRec/ctr/dnn/utils.py create mode 100644 PaddleRec/ctr/xdeepfm/utils.py diff --git a/PaddleRec/ctr/dcn/README.md b/PaddleRec/ctr/dcn/README.md index 560acee8..4c59c39a 100644 --- a/PaddleRec/ctr/dcn/README.md +++ b/PaddleRec/ctr/dcn/README.md @@ -10,6 +10,7 @@ ├── network.py # 网络结构 ├── config.py # 参数配置 ├── reader.py # 读取数据相关的函数 +├── utils.py # 通用函数 ├── data/ ├── download.sh # 下载数据脚本 ├── preprocess.py # 数据预处理脚本 @@ -23,7 +24,7 @@ DCN模型介绍可以参阅论文[Deep & Cross Network for Ad Click Predictions](https://arxiv.org/abs/1708.05123) ## 环境 -- PaddlePaddle 1.6 +- **目前模型库下模型均要求使用PaddlePaddle 1.6及以上版本或适当的develop版本** ## 数据下载 diff --git a/PaddleRec/ctr/dcn/cluster_train.py b/PaddleRec/ctr/dcn/cluster_train.py index 1b136ed9..aa862ea3 100644 --- a/PaddleRec/ctr/dcn/cluster_train.py +++ b/PaddleRec/ctr/dcn/cluster_train.py @@ -7,6 +7,7 @@ from collections import OrderedDict import paddle.fluid as fluid from network import DCN +import utils def parse_args(): @@ -194,4 +195,5 @@ def train(): if __name__ == "__main__": + utils.check_version() train() diff --git a/PaddleRec/ctr/dcn/infer.py b/PaddleRec/ctr/dcn/infer.py index 7d6fea62..25e1337d 100644 --- a/PaddleRec/ctr/dcn/infer.py +++ b/PaddleRec/ctr/dcn/infer.py @@ -16,6 +16,7 @@ from config import parse_args from reader import CriteoDataset from network import DCN from collections import OrderedDict +import utils logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger('fluid') @@ -94,4 +95,5 @@ def infer(): if __name__ == '__main__': + utils.check_version() infer() diff --git a/PaddleRec/ctr/dcn/local_train.py b/PaddleRec/ctr/dcn/local_train.py index 48ff7689..bb8c4240 100644 --- a/PaddleRec/ctr/dcn/local_train.py +++ b/PaddleRec/ctr/dcn/local_train.py @@ -11,6 +11,7 @@ import paddle.fluid as fluid from config import parse_args from network import DCN +import utils """ train DCN model """ @@ -86,4 +87,5 @@ def train(args): if __name__ == '__main__': args = parse_args() print(args) + utils.check_version() train(args) diff --git a/PaddleRec/ctr/dcn/network.py b/PaddleRec/ctr/dcn/network.py index 0589e0a1..f0637227 100644 --- a/PaddleRec/ctr/dcn/network.py +++ b/PaddleRec/ctr/dcn/network.py @@ -40,13 +40,13 @@ class DCN(object): def build_network(self, is_test=False): # data input - self.target_input = fluid.layers.data( - name='label', shape=[1], dtype='float32') + self.target_input = fluid.data( + name='label', shape=[None, 1], dtype='float32') data_dict = OrderedDict() for feat_name in self.feat_dims_dict: - data_dict[feat_name] = fluid.layers.data( - name=feat_name, shape=[1], dtype='float32') + data_dict[feat_name] = fluid.data( + name=feat_name, shape=[None, 1], dtype='float32') self.net_input = self._create_embedding_input(data_dict) @@ -120,7 +120,7 @@ class DCN(object): def _create_embedding_input(self, data_dict): # sparse embedding - sparse_emb_dict = OrderedDict((name, fluid.layers.embedding( + sparse_emb_dict = OrderedDict((name, fluid.embedding( input=fluid.layers.cast( data_dict[name], dtype='int64'), size=[ diff --git a/PaddleRec/ctr/dcn/utils.py b/PaddleRec/ctr/dcn/utils.py new file mode 100644 index 00000000..779b129e --- /dev/null +++ b/PaddleRec/ctr/dcn/utils.py @@ -0,0 +1,24 @@ +import sys +import paddle.fluid as fluid +import logging + +logging.basicConfig() +logger = logging.getLogger(__name__) + +__all__ = ['check_version'] + + +def check_version(): + """ + Log error and exit when the installed version of paddlepaddle is + not satisfied. + """ + err = "PaddlePaddle version 1.6 or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code." \ + + try: + fluid.require_version('1.6.0') + except Exception as e: + logger.error(err) + sys.exit(1) diff --git a/PaddleRec/ctr/deepfm/README.md b/PaddleRec/ctr/deepfm/README.md index a9847c80..9ff01aa1 100644 --- a/PaddleRec/ctr/deepfm/README.md +++ b/PaddleRec/ctr/deepfm/README.md @@ -15,7 +15,7 @@ This model implementation reproduces the result of the paper "DeepFM: A Factoriz ``` ## Environment -- PaddlePaddle 1.6 +- **Now all models in PaddleRec require PaddlePaddle version 1.6 or higher, or suitable develop version.** ## Download and preprocess data @@ -80,7 +80,7 @@ other params explained in cluster_train.py Infer ```bash -python infer.py --model_output_dir cluster_model --test_epoch 10 --test_data_dir=dist_data/dist_test_data --feat_dict='dist_data/aid_data/feat_dict_10.pkl2' +python infer.py --model_output_dir cluster_model --test_epoch 10 --num_feat 141443 --test_data_dir=dist_data/dist_test_data --feat_dict='dist_data/aid_data/feat_dict_10.pkl2' ``` Notes: diff --git a/PaddleRec/ctr/deepfm/cluster_train.py b/PaddleRec/ctr/deepfm/cluster_train.py index 23985ebe..5f03fee9 100644 --- a/PaddleRec/ctr/deepfm/cluster_train.py +++ b/PaddleRec/ctr/deepfm/cluster_train.py @@ -5,6 +5,7 @@ import time from network_conf import ctr_deepfm_model import paddle.fluid as fluid +import utils def parse_args(): @@ -153,7 +154,7 @@ def train(): dataset=dataset, fetch_list=[loss], fetch_info=['epoch %d batch loss' % (epoch_id + 1)], - print_period=20, + print_period=5, debug=False) model_dir = args.model_output_dir + '/epoch_' + str(epoch_id + 1) sys.stderr.write('epoch%d is finished and takes %f s\n' % ( @@ -188,4 +189,5 @@ def train(): if __name__ == "__main__": + utils.check_version() train() diff --git a/PaddleRec/ctr/deepfm/infer.py b/PaddleRec/ctr/deepfm/infer.py index c5ceb564..527d389c 100644 --- a/PaddleRec/ctr/deepfm/infer.py +++ b/PaddleRec/ctr/deepfm/infer.py @@ -11,6 +11,7 @@ import paddle.fluid as fluid from args import parse_args from criteo_reader import CriteoDataset from network_conf import ctr_deepfm_model +import utils logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger('fluid') @@ -71,4 +72,5 @@ def infer(): if __name__ == '__main__': + utils.check_version() infer() diff --git a/PaddleRec/ctr/deepfm/local_train.py b/PaddleRec/ctr/deepfm/local_train.py index b6edf974..d81ad518 100644 --- a/PaddleRec/ctr/deepfm/local_train.py +++ b/PaddleRec/ctr/deepfm/local_train.py @@ -6,6 +6,7 @@ from network_conf import ctr_deepfm_model import time import numpy import pickle +import utils def train(): @@ -59,4 +60,5 @@ def train(): if __name__ == '__main__': + utils.check_version() train() diff --git a/PaddleRec/ctr/deepfm/network_conf.py b/PaddleRec/ctr/deepfm/network_conf.py index 480a0c75..ad41bdf9 100644 --- a/PaddleRec/ctr/deepfm/network_conf.py +++ b/PaddleRec/ctr/deepfm/network_conf.py @@ -11,12 +11,12 @@ def ctr_deepfm_model(embedding_size, is_sparse=False): init_value_ = 0.1 - raw_feat_idx = fluid.layers.data( - name='feat_idx', shape=[num_field], dtype='int64') - raw_feat_value = fluid.layers.data( - name='feat_value', shape=[num_field], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='float32') # None * 1 + raw_feat_idx = fluid.data( + name='feat_idx', shape=[None, num_field], dtype='int64') + raw_feat_value = fluid.data( + name='feat_value', shape=[None, num_field], dtype='float32') + label = fluid.data( + name='label', shape=[None, 1], dtype='float32') # None * 1 feat_idx = fluid.layers.reshape(raw_feat_idx, [-1, 1]) # (None * num_field) * 1 @@ -25,7 +25,7 @@ def ctr_deepfm_model(embedding_size, # -------------------- first order term -------------------- - first_weights_re = fluid.layers.embedding( + first_weights_re = fluid.embedding( input=feat_idx, is_sparse=is_sparse, dtype='float32', @@ -41,7 +41,7 @@ def ctr_deepfm_model(embedding_size, # -------------------- second order term -------------------- - feat_embeddings_re = fluid.layers.embedding( + feat_embeddings_re = fluid.embedding( input=feat_idx, is_sparse=is_sparse, dtype='float32', diff --git a/PaddleRec/ctr/deepfm/utils.py b/PaddleRec/ctr/deepfm/utils.py new file mode 100644 index 00000000..779b129e --- /dev/null +++ b/PaddleRec/ctr/deepfm/utils.py @@ -0,0 +1,24 @@ +import sys +import paddle.fluid as fluid +import logging + +logging.basicConfig() +logger = logging.getLogger(__name__) + +__all__ = ['check_version'] + + +def check_version(): + """ + Log error and exit when the installed version of paddlepaddle is + not satisfied. + """ + err = "PaddlePaddle version 1.6 or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code." \ + + try: + fluid.require_version('1.6.0') + except Exception as e: + logger.error(err) + sys.exit(1) diff --git a/PaddleRec/ctr/dnn/README.cn.md b/PaddleRec/ctr/dnn/README.cn.md index 47d3b220..54a7d55b 100644 --- a/PaddleRec/ctr/dnn/README.cn.md +++ b/PaddleRec/ctr/dnn/README.cn.md @@ -15,6 +15,7 @@ ``` ## 运行环境 +**要求使用PaddlePaddle 1.6及以上版本或适当的develop版本。** 需要先安装PaddlePaddle Fluid,然后运行: ```shell diff --git a/PaddleRec/ctr/dnn/README.md b/PaddleRec/ctr/dnn/README.md index 022fa47c..9587a2a8 100644 --- a/PaddleRec/ctr/dnn/README.md +++ b/PaddleRec/ctr/dnn/README.md @@ -20,6 +20,7 @@ factorization machines, please refer to the paper [factorization machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) ## Environment +**Now all models in PaddleRec require PaddlePaddle version 1.6 or higher, or suitable develop version.** You should install PaddlePaddle Fluid first, and run: ```shell diff --git a/PaddleRec/ctr/dnn/infer.py b/PaddleRec/ctr/dnn/infer.py index 2f622629..680e253e 100644 --- a/PaddleRec/ctr/dnn/infer.py +++ b/PaddleRec/ctr/dnn/infer.py @@ -10,6 +10,7 @@ import paddle.fluid as fluid import reader from network_conf import ctr_dnn_model +import utils logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger("fluid") @@ -91,4 +92,5 @@ def infer(): if __name__ == '__main__': + utils.check_version() infer() diff --git a/PaddleRec/ctr/dnn/network_conf.py b/PaddleRec/ctr/dnn/network_conf.py index bb23d484..d53a2c23 100644 --- a/PaddleRec/ctr/dnn/network_conf.py +++ b/PaddleRec/ctr/dnn/network_conf.py @@ -31,20 +31,22 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, """ sparse_fm_layer """ - first_embeddings = fluid.layers.embedding( + first_embeddings = fluid.embedding( input=input, dtype='float32', size=[emb_dict_size, 1], is_sparse=True) + first_embeddings = fluid.layers.squeeze(input=first_embeddings, axes=[1]) first_order = fluid.layers.sequence_pool( input=first_embeddings, pool_type='sum') - nonzero_embeddings = fluid.layers.embedding( + nonzero_embeddings = fluid.embedding( input=input, dtype='float32', size=[emb_dict_size, factor_size], param_attr=fm_param_attr, is_sparse=True) + nonzero_embeddings = fluid.layers.squeeze(input=nonzero_embeddings, axes=[1]) summed_features_emb = fluid.layers.sequence_pool( input=nonzero_embeddings, pool_type='sum') summed_features_emb_square = fluid.layers.square(summed_features_emb) @@ -57,8 +59,8 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, summed_features_emb_square - squared_sum_features_emb) return first_order, second_order - dense_input = fluid.layers.data( - name="dense_input", shape=[dense_feature_dim], dtype='float32') + dense_input = fluid.data( + name="dense_input", shape=[None, dense_feature_dim], dtype='float32') sparse_input_ids = [ fluid.layers.data( @@ -66,7 +68,7 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, for i in range(1, 27) ] - label = fluid.layers.data(name='label', shape=[1], dtype='int64') + label = fluid.data(name='label', shape=[None, 1], dtype='int64') datas = [dense_input] + sparse_input_ids + [label] @@ -96,6 +98,7 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, size=[sparse_feature_dim, factor_size], param_attr=sparse_fm_param_attr, is_sparse=True) + emb = fluid.layers.squeeze(input=emb, axes=[1]) return fluid.layers.sequence_pool(input=emb, pool_type='average') sparse_embed_seq = list(map(embedding_layer, sparse_input_ids)) @@ -139,7 +142,7 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, def ctr_dnn_model(embedding_size, sparse_feature_dim, use_py_reader=True): def embedding_layer(input): """embedding_layer""" - emb = fluid.layers.embedding( + emb = fluid.embedding( input=input, is_sparse=True, # you need to patch https://github.com/PaddlePaddle/Paddle/pull/14190 @@ -149,18 +152,19 @@ def ctr_dnn_model(embedding_size, sparse_feature_dim, use_py_reader=True): param_attr=fluid.ParamAttr( name="SparseFeatFactors", initializer=fluid.initializer.Uniform())) + emb = fluid.layers.squeeze(input=emb, axes=[1]) return fluid.layers.sequence_pool(input=emb, pool_type='average') - dense_input = fluid.layers.data( - name="dense_input", shape=[dense_feature_dim], dtype='float32') + dense_input = fluid.data( + name="dense_input", shape=[None, dense_feature_dim], dtype='float32') sparse_input_ids = [ - fluid.layers.data( - name="C" + str(i), shape=[1], lod_level=1, dtype='int64') + fluid.data( + name="C" + str(i), shape=[None, 1], lod_level=1, dtype='int64') for i in range(1, 27) ] - label = fluid.layers.data(name='label', shape=[1], dtype='int64') + label = fluid.data(name='label', shape=[None, 1], dtype='int64') words = [dense_input] + sparse_input_ids + [label] diff --git a/PaddleRec/ctr/dnn/train.py b/PaddleRec/ctr/dnn/train.py index 69e51b9d..f63edeeb 100644 --- a/PaddleRec/ctr/dnn/train.py +++ b/PaddleRec/ctr/dnn/train.py @@ -13,6 +13,7 @@ import paddle.fluid as fluid import reader from network_conf import ctr_dnn_model from multiprocessing import cpu_count +import utils # disable gpu training for this example os.environ["CUDA_VISIBLE_DEVICES"] = "" @@ -269,4 +270,5 @@ def get_cards(args): if __name__ == '__main__': + utils.check_version() train() diff --git a/PaddleRec/ctr/dnn/utils.py b/PaddleRec/ctr/dnn/utils.py new file mode 100644 index 00000000..779b129e --- /dev/null +++ b/PaddleRec/ctr/dnn/utils.py @@ -0,0 +1,24 @@ +import sys +import paddle.fluid as fluid +import logging + +logging.basicConfig() +logger = logging.getLogger(__name__) + +__all__ = ['check_version'] + + +def check_version(): + """ + Log error and exit when the installed version of paddlepaddle is + not satisfied. + """ + err = "PaddlePaddle version 1.6 or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code." \ + + try: + fluid.require_version('1.6.0') + except Exception as e: + logger.error(err) + sys.exit(1) diff --git a/PaddleRec/ctr/xdeepfm/README.md b/PaddleRec/ctr/xdeepfm/README.md index 9b2475cd..cf759ec0 100644 --- a/PaddleRec/ctr/xdeepfm/README.md +++ b/PaddleRec/ctr/xdeepfm/README.md @@ -12,7 +12,7 @@ sh download.sh ``` ## 环境 -- PaddlePaddle 1.6 +- **要求使用PaddlePaddle 1.6及以上版本或适当的develop版本。** ## 单机训练 ```bash diff --git a/PaddleRec/ctr/xdeepfm/cluster_train.py b/PaddleRec/ctr/xdeepfm/cluster_train.py index 97135b89..0c2b4ea7 100644 --- a/PaddleRec/ctr/xdeepfm/cluster_train.py +++ b/PaddleRec/ctr/xdeepfm/cluster_train.py @@ -5,6 +5,7 @@ import time import network_conf import paddle.fluid as fluid +import utils def parse_args(): @@ -193,4 +194,5 @@ def train(): if __name__ == "__main__": + utils.check_version() train() diff --git a/PaddleRec/ctr/xdeepfm/infer.py b/PaddleRec/ctr/xdeepfm/infer.py index fe2fc8d3..dbac3579 100644 --- a/PaddleRec/ctr/xdeepfm/infer.py +++ b/PaddleRec/ctr/xdeepfm/infer.py @@ -8,6 +8,7 @@ import paddle.fluid as fluid from args import parse_args from criteo_reader import CriteoDataset import network_conf +import utils logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger('fluid') @@ -72,4 +73,5 @@ def infer(): if __name__ == '__main__': + utils.check_version() infer() diff --git a/PaddleRec/ctr/xdeepfm/local_train.py b/PaddleRec/ctr/xdeepfm/local_train.py index 8c548d49..d53dc882 100644 --- a/PaddleRec/ctr/xdeepfm/local_train.py +++ b/PaddleRec/ctr/xdeepfm/local_train.py @@ -56,4 +56,5 @@ def train(): if __name__ == '__main__': + utils.check_version() train() diff --git a/PaddleRec/ctr/xdeepfm/network_conf.py b/PaddleRec/ctr/xdeepfm/network_conf.py index 1cdc5c74..8a38f5d6 100644 --- a/PaddleRec/ctr/xdeepfm/network_conf.py +++ b/PaddleRec/ctr/xdeepfm/network_conf.py @@ -14,18 +14,18 @@ def ctr_xdeepfm_model(embedding_size, initer = fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_) - raw_feat_idx = fluid.layers.data( - name='feat_idx', shape=[num_field], dtype='int64') - raw_feat_value = fluid.layers.data( - name='feat_value', shape=[num_field], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='float32') # None * 1 + raw_feat_idx = fluid.data( + name='feat_idx', shape=[None, num_field], dtype='int64') + raw_feat_value = fluid.data( + name='feat_value', shape=[None, num_field], dtype='float32') + label = fluid.data( + name='label', shape=[None, 1], dtype='float32') # None * 1 feat_idx = fluid.layers.reshape(raw_feat_idx, [-1, 1]) # (None * num_field) * 1 feat_value = fluid.layers.reshape( raw_feat_value, [-1, num_field, 1]) # None * num_field * 1 - feat_embeddings = fluid.layers.embedding( + feat_embeddings = fluid.embedding( input=feat_idx, is_sparse=is_sparse, dtype='float32', @@ -39,7 +39,7 @@ def ctr_xdeepfm_model(embedding_size, # -------------------- linear -------------------- - weights_linear = fluid.layers.embedding( + weights_linear = fluid.embedding( input=feat_idx, is_sparse=is_sparse, dtype='float32', diff --git a/PaddleRec/ctr/xdeepfm/utils.py b/PaddleRec/ctr/xdeepfm/utils.py new file mode 100644 index 00000000..779b129e --- /dev/null +++ b/PaddleRec/ctr/xdeepfm/utils.py @@ -0,0 +1,24 @@ +import sys +import paddle.fluid as fluid +import logging + +logging.basicConfig() +logger = logging.getLogger(__name__) + +__all__ = ['check_version'] + + +def check_version(): + """ + Log error and exit when the installed version of paddlepaddle is + not satisfied. + """ + err = "PaddlePaddle version 1.6 or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code." \ + + try: + fluid.require_version('1.6.0') + except Exception as e: + logger.error(err) + sys.exit(1) -- GitLab