diff --git a/PaddleRec/ctr/dcn/README.md b/PaddleRec/ctr/dcn/README.md index 560acee8ab535d05831275e036d1fcfe541bd555..4c59c39a9a1b17d163597ce6727742ac2b751c2a 100644 --- a/PaddleRec/ctr/dcn/README.md +++ b/PaddleRec/ctr/dcn/README.md @@ -10,6 +10,7 @@ ├── network.py # 网络结构 ├── config.py # 参数配置 ├── reader.py # 读取数据相关的函数 +├── utils.py # 通用函数 ├── data/ ├── download.sh # 下载数据脚本 ├── preprocess.py # 数据预处理脚本 @@ -23,7 +24,7 @@ DCN模型介绍可以参阅论文[Deep & Cross Network for Ad Click Predictions](https://arxiv.org/abs/1708.05123) ## 环境 -- PaddlePaddle 1.6 +- **目前模型库下模型均要求使用PaddlePaddle 1.6及以上版本或适当的develop版本** ## 数据下载 diff --git a/PaddleRec/ctr/dcn/cluster_train.py b/PaddleRec/ctr/dcn/cluster_train.py index 1b136ed935b2b9896d26949a78e0390fb0233755..aa862ea3a57c7e31f0122e195ad732f4d5fef302 100644 --- a/PaddleRec/ctr/dcn/cluster_train.py +++ b/PaddleRec/ctr/dcn/cluster_train.py @@ -7,6 +7,7 @@ from collections import OrderedDict import paddle.fluid as fluid from network import DCN +import utils def parse_args(): @@ -194,4 +195,5 @@ def train(): if __name__ == "__main__": + utils.check_version() train() diff --git a/PaddleRec/ctr/dcn/infer.py b/PaddleRec/ctr/dcn/infer.py index 7d6fea628bf47af5599c818e4b4948d440f17846..25e1337db6d67cdbbae750f5e57623b145f8ab97 100644 --- a/PaddleRec/ctr/dcn/infer.py +++ b/PaddleRec/ctr/dcn/infer.py @@ -16,6 +16,7 @@ from config import parse_args from reader import CriteoDataset from network import DCN from collections import OrderedDict +import utils logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger('fluid') @@ -94,4 +95,5 @@ def infer(): if __name__ == '__main__': + utils.check_version() infer() diff --git a/PaddleRec/ctr/dcn/local_train.py b/PaddleRec/ctr/dcn/local_train.py index 48ff7689468bf4de016fcaf83a29dd07ea2d6a70..bb8c42405281768f910dd618e1d235ec80c9cb93 100644 --- a/PaddleRec/ctr/dcn/local_train.py +++ b/PaddleRec/ctr/dcn/local_train.py @@ -11,6 +11,7 @@ import paddle.fluid as fluid from config import parse_args from network import DCN +import utils """ train DCN model """ @@ -86,4 +87,5 @@ def train(args): if __name__ == '__main__': args = parse_args() print(args) + utils.check_version() train(args) diff --git a/PaddleRec/ctr/dcn/network.py b/PaddleRec/ctr/dcn/network.py index 0589e0a1cb4208717683174fb008a69019d897fd..f0637227b86efd02b4f5e348bd129723b8ab8936 100644 --- a/PaddleRec/ctr/dcn/network.py +++ b/PaddleRec/ctr/dcn/network.py @@ -40,13 +40,13 @@ class DCN(object): def build_network(self, is_test=False): # data input - self.target_input = fluid.layers.data( - name='label', shape=[1], dtype='float32') + self.target_input = fluid.data( + name='label', shape=[None, 1], dtype='float32') data_dict = OrderedDict() for feat_name in self.feat_dims_dict: - data_dict[feat_name] = fluid.layers.data( - name=feat_name, shape=[1], dtype='float32') + data_dict[feat_name] = fluid.data( + name=feat_name, shape=[None, 1], dtype='float32') self.net_input = self._create_embedding_input(data_dict) @@ -120,7 +120,7 @@ class DCN(object): def _create_embedding_input(self, data_dict): # sparse embedding - sparse_emb_dict = OrderedDict((name, fluid.layers.embedding( + sparse_emb_dict = OrderedDict((name, fluid.embedding( input=fluid.layers.cast( data_dict[name], dtype='int64'), size=[ diff --git a/PaddleRec/ctr/dcn/utils.py b/PaddleRec/ctr/dcn/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..779b129e574e4611f29202f71d73856ecb888069 --- /dev/null +++ b/PaddleRec/ctr/dcn/utils.py @@ -0,0 +1,24 @@ +import sys +import paddle.fluid as fluid +import logging + +logging.basicConfig() +logger = logging.getLogger(__name__) + +__all__ = ['check_version'] + + +def check_version(): + """ + Log error and exit when the installed version of paddlepaddle is + not satisfied. + """ + err = "PaddlePaddle version 1.6 or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code." \ + + try: + fluid.require_version('1.6.0') + except Exception as e: + logger.error(err) + sys.exit(1) diff --git a/PaddleRec/ctr/deepfm/README.md b/PaddleRec/ctr/deepfm/README.md index a9847c8073913cd102d3b18a19ea52b53a1922af..9ff01aa190114cc6e2b8cc95c9e2bcf01e82a38d 100644 --- a/PaddleRec/ctr/deepfm/README.md +++ b/PaddleRec/ctr/deepfm/README.md @@ -15,7 +15,7 @@ This model implementation reproduces the result of the paper "DeepFM: A Factoriz ``` ## Environment -- PaddlePaddle 1.6 +- **Now all models in PaddleRec require PaddlePaddle version 1.6 or higher, or suitable develop version.** ## Download and preprocess data @@ -80,7 +80,7 @@ other params explained in cluster_train.py Infer ```bash -python infer.py --model_output_dir cluster_model --test_epoch 10 --test_data_dir=dist_data/dist_test_data --feat_dict='dist_data/aid_data/feat_dict_10.pkl2' +python infer.py --model_output_dir cluster_model --test_epoch 10 --num_feat 141443 --test_data_dir=dist_data/dist_test_data --feat_dict='dist_data/aid_data/feat_dict_10.pkl2' ``` Notes: diff --git a/PaddleRec/ctr/deepfm/cluster_train.py b/PaddleRec/ctr/deepfm/cluster_train.py index 23985ebefab5d2ae0f1e92a32bb86dc0fd63c14e..5f03fee9154efbfcf0ba38640951a569c7aa8548 100644 --- a/PaddleRec/ctr/deepfm/cluster_train.py +++ b/PaddleRec/ctr/deepfm/cluster_train.py @@ -5,6 +5,7 @@ import time from network_conf import ctr_deepfm_model import paddle.fluid as fluid +import utils def parse_args(): @@ -153,7 +154,7 @@ def train(): dataset=dataset, fetch_list=[loss], fetch_info=['epoch %d batch loss' % (epoch_id + 1)], - print_period=20, + print_period=5, debug=False) model_dir = args.model_output_dir + '/epoch_' + str(epoch_id + 1) sys.stderr.write('epoch%d is finished and takes %f s\n' % ( @@ -188,4 +189,5 @@ def train(): if __name__ == "__main__": + utils.check_version() train() diff --git a/PaddleRec/ctr/deepfm/infer.py b/PaddleRec/ctr/deepfm/infer.py index c5ceb564ddc482626887ee9bc12f252b5ff7e6fa..527d389cb5bf2aa19f712e88ba94fa77b1f488b3 100644 --- a/PaddleRec/ctr/deepfm/infer.py +++ b/PaddleRec/ctr/deepfm/infer.py @@ -11,6 +11,7 @@ import paddle.fluid as fluid from args import parse_args from criteo_reader import CriteoDataset from network_conf import ctr_deepfm_model +import utils logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger('fluid') @@ -71,4 +72,5 @@ def infer(): if __name__ == '__main__': + utils.check_version() infer() diff --git a/PaddleRec/ctr/deepfm/local_train.py b/PaddleRec/ctr/deepfm/local_train.py index b6edf9742297a822f300461e075aef55282ca4a9..d81ad518c65a27d454d3dde826b4306706bfb7b5 100644 --- a/PaddleRec/ctr/deepfm/local_train.py +++ b/PaddleRec/ctr/deepfm/local_train.py @@ -6,6 +6,7 @@ from network_conf import ctr_deepfm_model import time import numpy import pickle +import utils def train(): @@ -59,4 +60,5 @@ def train(): if __name__ == '__main__': + utils.check_version() train() diff --git a/PaddleRec/ctr/deepfm/network_conf.py b/PaddleRec/ctr/deepfm/network_conf.py index 480a0c753c069a36058ce3b9efab671ccb77ab9b..ad41bdf97472079a248cd19bd3282f1889e314be 100644 --- a/PaddleRec/ctr/deepfm/network_conf.py +++ b/PaddleRec/ctr/deepfm/network_conf.py @@ -11,12 +11,12 @@ def ctr_deepfm_model(embedding_size, is_sparse=False): init_value_ = 0.1 - raw_feat_idx = fluid.layers.data( - name='feat_idx', shape=[num_field], dtype='int64') - raw_feat_value = fluid.layers.data( - name='feat_value', shape=[num_field], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='float32') # None * 1 + raw_feat_idx = fluid.data( + name='feat_idx', shape=[None, num_field], dtype='int64') + raw_feat_value = fluid.data( + name='feat_value', shape=[None, num_field], dtype='float32') + label = fluid.data( + name='label', shape=[None, 1], dtype='float32') # None * 1 feat_idx = fluid.layers.reshape(raw_feat_idx, [-1, 1]) # (None * num_field) * 1 @@ -25,7 +25,7 @@ def ctr_deepfm_model(embedding_size, # -------------------- first order term -------------------- - first_weights_re = fluid.layers.embedding( + first_weights_re = fluid.embedding( input=feat_idx, is_sparse=is_sparse, dtype='float32', @@ -41,7 +41,7 @@ def ctr_deepfm_model(embedding_size, # -------------------- second order term -------------------- - feat_embeddings_re = fluid.layers.embedding( + feat_embeddings_re = fluid.embedding( input=feat_idx, is_sparse=is_sparse, dtype='float32', diff --git a/PaddleRec/ctr/deepfm/utils.py b/PaddleRec/ctr/deepfm/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..779b129e574e4611f29202f71d73856ecb888069 --- /dev/null +++ b/PaddleRec/ctr/deepfm/utils.py @@ -0,0 +1,24 @@ +import sys +import paddle.fluid as fluid +import logging + +logging.basicConfig() +logger = logging.getLogger(__name__) + +__all__ = ['check_version'] + + +def check_version(): + """ + Log error and exit when the installed version of paddlepaddle is + not satisfied. + """ + err = "PaddlePaddle version 1.6 or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code." \ + + try: + fluid.require_version('1.6.0') + except Exception as e: + logger.error(err) + sys.exit(1) diff --git a/PaddleRec/ctr/dnn/README.cn.md b/PaddleRec/ctr/dnn/README.cn.md index 47d3b22099886b40499be3168f4f2759c9e3facd..54a7d55b8d9ae5183645fd8c0ff87afd5c22258b 100644 --- a/PaddleRec/ctr/dnn/README.cn.md +++ b/PaddleRec/ctr/dnn/README.cn.md @@ -15,6 +15,7 @@ ``` ## 运行环境 +**要求使用PaddlePaddle 1.6及以上版本或适当的develop版本。** 需要先安装PaddlePaddle Fluid,然后运行: ```shell diff --git a/PaddleRec/ctr/dnn/README.md b/PaddleRec/ctr/dnn/README.md index 022fa47c9248269f184b316ba7642324cbdc8717..9587a2a8d9f76f90a9b9e58a9ce5bbe1c7baa138 100644 --- a/PaddleRec/ctr/dnn/README.md +++ b/PaddleRec/ctr/dnn/README.md @@ -20,6 +20,7 @@ factorization machines, please refer to the paper [factorization machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) ## Environment +**Now all models in PaddleRec require PaddlePaddle version 1.6 or higher, or suitable develop version.** You should install PaddlePaddle Fluid first, and run: ```shell diff --git a/PaddleRec/ctr/dnn/infer.py b/PaddleRec/ctr/dnn/infer.py index 2f622629ec487b076f382f141061804a760c71ac..680e253e5ba380868f988ab111415b5d0cbca6b2 100644 --- a/PaddleRec/ctr/dnn/infer.py +++ b/PaddleRec/ctr/dnn/infer.py @@ -10,6 +10,7 @@ import paddle.fluid as fluid import reader from network_conf import ctr_dnn_model +import utils logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger("fluid") @@ -91,4 +92,5 @@ def infer(): if __name__ == '__main__': + utils.check_version() infer() diff --git a/PaddleRec/ctr/dnn/network_conf.py b/PaddleRec/ctr/dnn/network_conf.py index bb23d4844a8785ddb4dde49e4b5d83b1359b9e07..d53a2c23c803d777b45ba3a1efa75cbcea16508c 100644 --- a/PaddleRec/ctr/dnn/network_conf.py +++ b/PaddleRec/ctr/dnn/network_conf.py @@ -31,20 +31,22 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, """ sparse_fm_layer """ - first_embeddings = fluid.layers.embedding( + first_embeddings = fluid.embedding( input=input, dtype='float32', size=[emb_dict_size, 1], is_sparse=True) + first_embeddings = fluid.layers.squeeze(input=first_embeddings, axes=[1]) first_order = fluid.layers.sequence_pool( input=first_embeddings, pool_type='sum') - nonzero_embeddings = fluid.layers.embedding( + nonzero_embeddings = fluid.embedding( input=input, dtype='float32', size=[emb_dict_size, factor_size], param_attr=fm_param_attr, is_sparse=True) + nonzero_embeddings = fluid.layers.squeeze(input=nonzero_embeddings, axes=[1]) summed_features_emb = fluid.layers.sequence_pool( input=nonzero_embeddings, pool_type='sum') summed_features_emb_square = fluid.layers.square(summed_features_emb) @@ -57,8 +59,8 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, summed_features_emb_square - squared_sum_features_emb) return first_order, second_order - dense_input = fluid.layers.data( - name="dense_input", shape=[dense_feature_dim], dtype='float32') + dense_input = fluid.data( + name="dense_input", shape=[None, dense_feature_dim], dtype='float32') sparse_input_ids = [ fluid.layers.data( @@ -66,7 +68,7 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, for i in range(1, 27) ] - label = fluid.layers.data(name='label', shape=[1], dtype='int64') + label = fluid.data(name='label', shape=[None, 1], dtype='int64') datas = [dense_input] + sparse_input_ids + [label] @@ -96,6 +98,7 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, size=[sparse_feature_dim, factor_size], param_attr=sparse_fm_param_attr, is_sparse=True) + emb = fluid.layers.squeeze(input=emb, axes=[1]) return fluid.layers.sequence_pool(input=emb, pool_type='average') sparse_embed_seq = list(map(embedding_layer, sparse_input_ids)) @@ -139,7 +142,7 @@ def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, def ctr_dnn_model(embedding_size, sparse_feature_dim, use_py_reader=True): def embedding_layer(input): """embedding_layer""" - emb = fluid.layers.embedding( + emb = fluid.embedding( input=input, is_sparse=True, # you need to patch https://github.com/PaddlePaddle/Paddle/pull/14190 @@ -149,18 +152,19 @@ def ctr_dnn_model(embedding_size, sparse_feature_dim, use_py_reader=True): param_attr=fluid.ParamAttr( name="SparseFeatFactors", initializer=fluid.initializer.Uniform())) + emb = fluid.layers.squeeze(input=emb, axes=[1]) return fluid.layers.sequence_pool(input=emb, pool_type='average') - dense_input = fluid.layers.data( - name="dense_input", shape=[dense_feature_dim], dtype='float32') + dense_input = fluid.data( + name="dense_input", shape=[None, dense_feature_dim], dtype='float32') sparse_input_ids = [ - fluid.layers.data( - name="C" + str(i), shape=[1], lod_level=1, dtype='int64') + fluid.data( + name="C" + str(i), shape=[None, 1], lod_level=1, dtype='int64') for i in range(1, 27) ] - label = fluid.layers.data(name='label', shape=[1], dtype='int64') + label = fluid.data(name='label', shape=[None, 1], dtype='int64') words = [dense_input] + sparse_input_ids + [label] diff --git a/PaddleRec/ctr/dnn/train.py b/PaddleRec/ctr/dnn/train.py index 69e51b9db9bec7d38658e442354363b0e72301c4..f63edeeb35416611ce59e4361a2cac5d70a65eea 100644 --- a/PaddleRec/ctr/dnn/train.py +++ b/PaddleRec/ctr/dnn/train.py @@ -13,6 +13,7 @@ import paddle.fluid as fluid import reader from network_conf import ctr_dnn_model from multiprocessing import cpu_count +import utils # disable gpu training for this example os.environ["CUDA_VISIBLE_DEVICES"] = "" @@ -269,4 +270,5 @@ def get_cards(args): if __name__ == '__main__': + utils.check_version() train() diff --git a/PaddleRec/ctr/dnn/utils.py b/PaddleRec/ctr/dnn/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..779b129e574e4611f29202f71d73856ecb888069 --- /dev/null +++ b/PaddleRec/ctr/dnn/utils.py @@ -0,0 +1,24 @@ +import sys +import paddle.fluid as fluid +import logging + +logging.basicConfig() +logger = logging.getLogger(__name__) + +__all__ = ['check_version'] + + +def check_version(): + """ + Log error and exit when the installed version of paddlepaddle is + not satisfied. + """ + err = "PaddlePaddle version 1.6 or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code." \ + + try: + fluid.require_version('1.6.0') + except Exception as e: + logger.error(err) + sys.exit(1) diff --git a/PaddleRec/ctr/xdeepfm/README.md b/PaddleRec/ctr/xdeepfm/README.md index 9b2475cd789db6e298db31194713e27064849d8b..cf759ec0190c6136aa41009864361ddd2c23ed49 100644 --- a/PaddleRec/ctr/xdeepfm/README.md +++ b/PaddleRec/ctr/xdeepfm/README.md @@ -12,7 +12,7 @@ sh download.sh ``` ## 环境 -- PaddlePaddle 1.6 +- **要求使用PaddlePaddle 1.6及以上版本或适当的develop版本。** ## 单机训练 ```bash diff --git a/PaddleRec/ctr/xdeepfm/cluster_train.py b/PaddleRec/ctr/xdeepfm/cluster_train.py index 97135b89b71209833304cd968176c0725b1ce49c..0c2b4ea7e9cb34383095bdf0feab37481d5ca2e3 100644 --- a/PaddleRec/ctr/xdeepfm/cluster_train.py +++ b/PaddleRec/ctr/xdeepfm/cluster_train.py @@ -5,6 +5,7 @@ import time import network_conf import paddle.fluid as fluid +import utils def parse_args(): @@ -193,4 +194,5 @@ def train(): if __name__ == "__main__": + utils.check_version() train() diff --git a/PaddleRec/ctr/xdeepfm/infer.py b/PaddleRec/ctr/xdeepfm/infer.py index fe2fc8d326ae7cb3e489c3bd765cc17903ac5321..dbac3579188d165be002827541cf16112e2b66d2 100644 --- a/PaddleRec/ctr/xdeepfm/infer.py +++ b/PaddleRec/ctr/xdeepfm/infer.py @@ -8,6 +8,7 @@ import paddle.fluid as fluid from args import parse_args from criteo_reader import CriteoDataset import network_conf +import utils logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger('fluid') @@ -72,4 +73,5 @@ def infer(): if __name__ == '__main__': + utils.check_version() infer() diff --git a/PaddleRec/ctr/xdeepfm/local_train.py b/PaddleRec/ctr/xdeepfm/local_train.py index 8c548d49ca34a45718b1b872412a9b3781fccad7..d53dc882c65b8e508dff68512c01c3ee5bfdc5d6 100644 --- a/PaddleRec/ctr/xdeepfm/local_train.py +++ b/PaddleRec/ctr/xdeepfm/local_train.py @@ -56,4 +56,5 @@ def train(): if __name__ == '__main__': + utils.check_version() train() diff --git a/PaddleRec/ctr/xdeepfm/network_conf.py b/PaddleRec/ctr/xdeepfm/network_conf.py index 1cdc5c74990ca9a41865c31fdf1dd00822d4764c..8a38f5d6106bd0ae3fc5753702b329f264b99f55 100644 --- a/PaddleRec/ctr/xdeepfm/network_conf.py +++ b/PaddleRec/ctr/xdeepfm/network_conf.py @@ -14,18 +14,18 @@ def ctr_xdeepfm_model(embedding_size, initer = fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_) - raw_feat_idx = fluid.layers.data( - name='feat_idx', shape=[num_field], dtype='int64') - raw_feat_value = fluid.layers.data( - name='feat_value', shape=[num_field], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='float32') # None * 1 + raw_feat_idx = fluid.data( + name='feat_idx', shape=[None, num_field], dtype='int64') + raw_feat_value = fluid.data( + name='feat_value', shape=[None, num_field], dtype='float32') + label = fluid.data( + name='label', shape=[None, 1], dtype='float32') # None * 1 feat_idx = fluid.layers.reshape(raw_feat_idx, [-1, 1]) # (None * num_field) * 1 feat_value = fluid.layers.reshape( raw_feat_value, [-1, num_field, 1]) # None * num_field * 1 - feat_embeddings = fluid.layers.embedding( + feat_embeddings = fluid.embedding( input=feat_idx, is_sparse=is_sparse, dtype='float32', @@ -39,7 +39,7 @@ def ctr_xdeepfm_model(embedding_size, # -------------------- linear -------------------- - weights_linear = fluid.layers.embedding( + weights_linear = fluid.embedding( input=feat_idx, is_sparse=is_sparse, dtype='float32', diff --git a/PaddleRec/ctr/xdeepfm/utils.py b/PaddleRec/ctr/xdeepfm/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..779b129e574e4611f29202f71d73856ecb888069 --- /dev/null +++ b/PaddleRec/ctr/xdeepfm/utils.py @@ -0,0 +1,24 @@ +import sys +import paddle.fluid as fluid +import logging + +logging.basicConfig() +logger = logging.getLogger(__name__) + +__all__ = ['check_version'] + + +def check_version(): + """ + Log error and exit when the installed version of paddlepaddle is + not satisfied. + """ + err = "PaddlePaddle version 1.6 or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code." \ + + try: + fluid.require_version('1.6.0') + except Exception as e: + logger.error(err) + sys.exit(1)