add ncf youtube

a01831d1 · frankwhzhang · bef91cbd · a01831d1 · a01831d1 · a01831d1
18 changed file
--- a/core/utils/envs.py
+++ b/core/utils/envs.py
@@ -90,18 +90,12 @@ def get_global_envs():
 def path_adapter(path):
-    def adapt(l_p):
-        if get_platform() == "WINDOWS":
-            adapted_p = l_p.split("paddlerec.")[1].replace(".", "\\")
-        else:
-            adapted_p = l_p.split("paddlerec.")[1].replace(".", "/")
-        return adapted_p
    if path.startswith("paddlerec."):
        package = get_runtime_environ("PACKAGE_BASE")
-        return os.path.join(package, adapt(path))
+        l_p = path.split("paddlerec.")[1].replace(".", "/")
+        return os.path.join(package, l_p)
    else:
-        return adapt(path)
+        return path 
 def windows_path_converter(path):

--- a/doc/imgs/ncf.png
+++ b/doc/imgs/ncf.png
--- a/doc/imgs/youtube_dnn.png
+++ b/doc/imgs/youtube_dnn.png
--- a/models/rank/din/reader.py
+++ b/models/rank/din/reader.py
@@ -128,4 +128,4 @@ class TrainReader(Reader):
        random.shuffle(data_set)
        return self.batch_reader(data_set, self.batch_size, self.batch_size * 20)
\ No newline at end of file
--- a/models/recall/ncf/__init__.py
+++ b/models/recall/ncf/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/models/recall/ncf/config.yaml
+++ b/models/recall/ncf/config.yaml
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+evaluate:
+  reader:
+    batch_size: 1
+    class: "{workspace}/movielens_infer_reader.py"
+    test_data_path: "{workspace}/data/test"
+train:
+  trainer:
+    # for cluster training
+    strategy: "async"
+  epochs: 3
+  workspace: "paddlerec.models.recall.ncf"
+  device: cpu
+  reader:
+    batch_size: 2
+    class: "{workspace}/movielens_reader.py"
+    train_data_path: "{workspace}/data/train"
+  model:
+    models: "{workspace}/model.py"
+    hyper_parameters:
+      num_users: 6040
+      num_items: 3706
+      latent_dim: 8
+      layers: [64, 32, 16, 8]
+      learning_rate: 0.001
+      optimizer: adam
+  save:
+    increment:
+      dirname: "increment"
+      epoch_interval: 2
+      save_last: True
+    inference:
+      dirname: "inference"
+      epoch_interval: 4
+      save_last: True
--- a/models/recall/ncf/data/test/small_data.txt
+++ b/models/recall/ncf/data/test/small_data.txt
+4764,174,1
+4764,2958,0
+4764,452,0
+4764,1946,0
+4764,3208,0
+2044,2237,1
+2044,1998,0
+2044,328,0
+2044,1542,0
+2044,1932,0
+4276,65,1
+4276,3247,0
+4276,942,0
+4276,3666,0
+4276,2222,0
+3933,682,1
+3933,2451,0
+3933,3695,0
+3933,1643,0
+3933,3568,0
+1151,1265,1
+1151,118,0
+1151,2532,0
+1151,2083,0
+1151,2350,0
+1757,876,1
+1757,201,0
+1757,3633,0
+1757,1068,0
+1757,2549,0
+3370,276,1
+3370,2435,0
+3370,606,0
+3370,910,0
+3370,2146,0
+5137,1018,1
+5137,2163,0
+5137,3167,0
+5137,2315,0
+5137,3595,0
+3933,2831,1
+3933,2881,0
+3933,2949,0
+3933,3660,0
+3933,417,0
+3102,999,1
+3102,1902,0
+3102,2161,0
+3102,3042,0
+3102,1113,0
+2022,336,1
+2022,1672,0
+2022,2656,0
+2022,3649,0
+2022,883,0
+2664,655,1
+2664,3660,0
+2664,1711,0
+2664,3386,0
+2664,1668,0
+25,701,1
+25,32,0
+25,2482,0
+25,3177,0
+25,2767,0
+1738,1643,1
+1738,2187,0
+1738,228,0
+1738,650,0
+1738,3101,0
+5411,1241,1
+5411,2546,0
+5411,3019,0
+5411,3618,0
+5411,1674,0
+638,579,1
+638,3512,0
+638,783,0
+638,2111,0
+638,1880,0
+3554,200,1
+3554,2893,0
+3554,2428,0
+3554,969,0
+3554,2741,0
+4283,1074,1
+4283,3056,0
+4283,2032,0
+4283,405,0
+4283,1505,0
+5111,200,1
+5111,3488,0
+5111,477,0
+5111,2790,0
+5111,40,0
+3964,515,1
+3964,1528,0
+3964,2173,0
+3964,1701,0
+3964,2832,0
--- a/models/recall/ncf/data/train/small_data.txt
+++ b/models/recall/ncf/data/train/small_data.txt
+4764,174,1
+4764,2958,0
+4764,452,0
+4764,1946,0
+4764,3208,0
+2044,2237,1
+2044,1998,0
+2044,328,0
+2044,1542,0
+2044,1932,0
+4276,65,1
+4276,3247,0
+4276,942,0
+4276,3666,0
+4276,2222,0
+3933,682,1
+3933,2451,0
+3933,3695,0
+3933,1643,0
+3933,3568,0
+1151,1265,1
+1151,118,0
+1151,2532,0
+1151,2083,0
+1151,2350,0
+1757,876,1
+1757,201,0
+1757,3633,0
+1757,1068,0
+1757,2549,0
+3370,276,1
+3370,2435,0
+3370,606,0
+3370,910,0
+3370,2146,0
+5137,1018,1
+5137,2163,0
+5137,3167,0
+5137,2315,0
+5137,3595,0
+3933,2831,1
+3933,2881,0
+3933,2949,0
+3933,3660,0
+3933,417,0
+3102,999,1
+3102,1902,0
+3102,2161,0
+3102,3042,0
+3102,1113,0
+2022,336,1
+2022,1672,0
+2022,2656,0
+2022,3649,0
+2022,883,0
+2664,655,1
+2664,3660,0
+2664,1711,0
+2664,3386,0
+2664,1668,0
+25,701,1
+25,32,0
+25,2482,0
+25,3177,0
+25,2767,0
+1738,1643,1
+1738,2187,0
+1738,228,0
+1738,650,0
+1738,3101,0
+5411,1241,1
+5411,2546,0
+5411,3019,0
+5411,3618,0
+5411,1674,0
+638,579,1
+638,3512,0
+638,783,0
+638,2111,0
+638,1880,0
+3554,200,1
+3554,2893,0
+3554,2428,0
+3554,969,0
+3554,2741,0
+4283,1074,1
+4283,3056,0
+4283,2032,0
+4283,405,0
+4283,1505,0
+5111,200,1
+5111,3488,0
+5111,477,0
+5111,2790,0
+5111,40,0
+3964,515,1
+3964,1528,0
+3964,2173,0
+3964,1701,0
+3964,2832,0
--- a/models/recall/ncf/model.py
+++ b/models/recall/ncf/model.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import paddle.fluid as fluid
+from paddlerec.core.utils import envs
+from paddlerec.core.model import Model as ModelBase
+import numpy as np
+class Model(ModelBase):
+    def __init__(self, config):
+        ModelBase.__init__(self, config)
+    def input_data(self, is_infer=False):
+        user_input = fluid.data(name="user_input", shape=[-1, 1], dtype="int64", lod_level=0)
+        item_input = fluid.data(name="item_input", shape=[-1, 1], dtype="int64", lod_level=0)
+        label = fluid.data(name="label", shape=[-1, 1], dtype="int64", lod_level=0)
+        if is_infer:
+            inputs = [user_input] + [item_input]
+        else:
+            inputs = [user_input] + [item_input] + [label]
+            self._data_var = inputs
+        return inputs
+    def net(self, inputs, is_infer=False):
+        num_users = envs.get_global_env("hyper_parameters.num_users", None, self._namespace)
+        num_items = envs.get_global_env("hyper_parameters.num_items", None, self._namespace)
+        latent_dim = envs.get_global_env("hyper_parameters.latent_dim", None, self._namespace)
+        layers = envs.get_global_env("hyper_parameters.layers", None, self._namespace)
+        num_layer = len(layers) #Number of layers in the MLP
+        MF_Embedding_User = fluid.embedding(input=inputs[0],
+                                            size=[num_users, latent_dim],
+                                            param_attr=fluid.initializer.Normal(loc=0.0, scale=0.01),
+                                            is_sparse=True)
+        MF_Embedding_Item = fluid.embedding(input=inputs[1],
+                                        size=[num_items, latent_dim],
+                                        param_attr=fluid.initializer.Normal(loc=0.0, scale=0.01),
+                                        is_sparse=True)
+        MLP_Embedding_User = fluid.embedding(input=inputs[0],
+                                            size=[num_users, int(layers[0] / 2)],
+                                            param_attr=fluid.initializer.Normal(loc=0.0, scale=0.01),
+                                            is_sparse=True)
+        MLP_Embedding_Item = fluid.embedding(input=inputs[1],
+                                        size=[num_items, int(layers[0] / 2)],
+                                        param_attr=fluid.initializer.Normal(loc=0.0, scale=0.01),
+                                        is_sparse=True)
+        # MF part
+        mf_user_latent = fluid.layers.flatten(x=MF_Embedding_User, axis=1)
+        mf_item_latent = fluid.layers.flatten(x=MF_Embedding_Item, axis=1)
+        mf_vector = fluid.layers.elementwise_mul(mf_user_latent, mf_item_latent)
+        # MLP part 
+        # The 0-th layer is the concatenation of embedding layers
+        mlp_user_latent = fluid.layers.flatten(x=MLP_Embedding_User, axis=1)
+        mlp_item_latent = fluid.layers.flatten(x=MLP_Embedding_Item, axis=1)
+        mlp_vector = fluid.layers.concat(input=[mlp_user_latent, mlp_item_latent], axis=-1)
+        for i in range(1, num_layer):
+            mlp_vector = fluid.layers.fc(input=mlp_vector,
+                                       size=layers[i],
+                                       act='relu',
+                                       param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0 / math.sqrt(mlp_vector.shape[1])),
+                                                       regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4)),
+                                       name='layer_' + str(i))
+        # Concatenate MF and MLP parts
+        predict_vector = fluid.layers.concat(input=[mf_vector, mlp_vector], axis=-1)
+        # Final prediction layer
+        prediction = fluid.layers.fc(input=predict_vector,
+                                    size=1,
+                                    act='sigmoid',
+                                    param_attr=fluid.initializer.MSRAInitializer(uniform=True), 
+                                    name='prediction')
+        if is_infer:
+            self._infer_results["prediction"] = prediction
+            return
+        cost = fluid.layers.log_loss(input=prediction, label=fluid.layers.cast(x=inputs[2], dtype='float32'))
+        avg_cost = fluid.layers.mean(cost)
+        self._cost = avg_cost
+        self._metrics["cost"] = avg_cost
+    def train_net(self):
+        input_data = self.input_data()
+        self.net(input_data)
+    def infer_net(self):
+        self._infer_data_var = self.input_data(is_infer=True)
+        self._infer_data_loader = fluid.io.DataLoader.from_generator(
+                feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
+        self.net(self._infer_data_var, is_infer=True)
--- a/models/recall/ncf/movielens_infer_reader.py
+++ b/models/recall/ncf/movielens_infer_reader.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+from paddlerec.core.reader import Reader
+from paddlerec.core.utils import envs
+from collections import defaultdict
+import numpy as np
+class EvaluateReader(Reader):
+    def init(self):
+        pass
+    def generate_sample(self, line):
+        """
+        Read the data line by line and process it as a dictionary
+        """
+        def reader():
+            """
+            This function needs to be implemented by the user, based on data format
+            """
+            features = line.strip().split(',')
+            feature_name = ["user_input", "item_input"]
+            yield zip(feature_name, [[int(features[0])]] + [[int(features[1])]])
+        return reader
--- a/models/recall/ncf/movielens_reader.py
+++ b/models/recall/ncf/movielens_reader.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+from paddlerec.core.reader import Reader
+from paddlerec.core.utils import envs
+from collections import defaultdict
+import numpy as np
+class TrainReader(Reader):
+    def init(self):
+        pass
+    def generate_sample(self, line):
+        """
+        Read the data line by line and process it as a dictionary
+        """
+        def reader():
+            """
+            This function needs to be implemented by the user, based on data format
+            """
+            features = line.strip().split(',')
+            feature_name = ["user_input", "item_input", "label"]
+            yield zip(feature_name, [[int(features[0])]] + [[int(features[1])]] + [[int(features[2])]])
+        return reader
--- a/models/recall/readme.md
+++ b/models/recall/readme.md
 # 召回模型库
 ## 简介
-我们提供了常见的召回任务中使用的模型算法的PaddleRec实现, 单机训练&预测效果指标以及分布式训练&预测性能指标等。实现的召回模型包括 [SR-GNN](http://gitlab.baidu.com/tangwei12/paddlerec/tree/develop/models/recall/gnn)、[GRU4REC](http://gitlab.baidu.com/tangwei12/paddlerec/tree/develop/models/recall/gru4rec)、[Sequence Semantic Retrieval Model](http://gitlab.baidu.com/tangwei12/paddlerec/tree/develop/models/recall/ssr)、[Word2Vector](http://gitlab.baidu.com/tangwei12/paddlerec/tree/develop/models/recall/word2vec)。
+我们提供了常见的召回任务中使用的模型算法的PaddleRec实现, 单机训练&预测效果指标以及分布式训练&预测性能指标等。实现的召回模型包括 [SR-GNN](gnn)、[GRU4REC](gru4rec)、[Sequence Semantic Retrieval Model](ssr)、[Word2Vector](word2vec)、[Youtube_DNN](youtube_dnn)、[ncf](ncf)。
 模型算法库在持续添加中，欢迎关注。
@@ -9,7 +9,7 @@
 * [整体介绍](#整体介绍)
    * [召回模型列表](#召回模型列表)
 * [使用教程](#使用教程)
-    * [训练&预测](#训练&预测)
+    * [训练 预测](#训练 预测)
 * [效果对比](#效果对比)
    * [模型效果列表](#模型效果列表)
@@ -20,7 +20,9 @@
 | :------------------: | :--------------------: | :---------: |
 | Word2Vec | word2vector | [Distributed Representations of Words and Phrases and their Compositionality](https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf)(2013) |
 | GRU4REC | SR-GRU | [Session-based Recommendations with Recurrent Neural Networks](https://arxiv.org/abs/1511.06939)(2015) |
+| Youtube_DNN | Youtube_DNN | [Deep Neural Networks for YouTube Recommendations](https://static.googleusercontent.com/media/research.google.com/zh-CN//pubs/archive/45530.pdf)(2016) |
 | SSR | Sequence Semantic Retrieval Model | [Multi-Rate Deep Learning for Temporal Recommendation](http://sonyis.me/paperpdf/spr209-song_sigir16.pdf)(2016) |
+| NCF | Neural Collaborative Filtering | [Neural Collaborative Filtering](https://arxiv.org/pdf/1708.05031.pdf)(2017) |
 | GNN | SR-GNN | [Session-based Recommendation with Graph Neural Networks](https://arxiv.org/abs/1811.00855)(2018) |
 下面是每个模型的简介（注：图片引用自链接中的论文）
@@ -35,31 +37,45 @@
 <img align="center" src="../../doc/imgs/gru4rec.png">
 <p>
+[Youtube_DNN](https://static.googleusercontent.com/media/research.google.com/zh-CN//pubs/archive/45530.pdf):
+<p align="center">
+<img align="center" src="../../doc/imgs/youtube_dnn.png">
+<p>
 [SSR](http://sonyis.me/paperpdf/spr209-song_sigir16.pdf):
 <p align="center">
 <img align="center" src="../../doc/imgs/ssr.png">
 <p>
+[NCF](https://arxiv.org/pdf/1708.05031.pdf):
+<p align="center">
+<img align="center" src="../../doc/imgs/ncf.png">
+<p>
 [GNN](https://arxiv.org/abs/1811.00855):
 <p align="center">
 <img align="center" src="../../doc/imgs/gnn.png">
 <p>
 ## 使用教程
-### 训练&预测
+### 训练 预测
 ```shell
 python -m paddlerec.run -m paddlerec.models.recall.word2vec # word2vec
 python -m paddlerec.run -m paddlerec.models.recall.ssr # ssr
 python -m paddlerec.run -m paddlerec.models.recall.gru4rec # gru4rec
 python -m paddlerec.run -m paddlerec.models.recall.gnn # gnn
+python -m paddlerec.run -m paddlerec.models.recall.ncf # ncf
+python -m paddlerec.run -m paddlerec.models.recall.youtube_dnn # youtube_dnn
 ```
 ## 效果对比
 ### 模型效果列表
-|       数据集        |       模型       |       loss        |       Recall@20       | 
+|       数据集        |       模型       |       HR@10        |       Recall@20       | 
 | :------------------: | :--------------------: | :---------: |:---------: |
 |       DIGINETICA     |       GNN       |       --        |       0.507       |
 |       RSC15        |       GRU4REC       |       --        |       0.670          |
 |       RSC15        |       SSR       |       --        |       0.590          |
+|       MOVIELENS        |       NCF       |       0.688        |       --          |
+|       --        |       Youtube       |       --        |       --          |
 |       1 Billion Word Language Model Benchmark        |       Word2Vec       |       --         |       0.54          |
--- a/models/recall/youtube_dnn/__init__.py
+++ b/models/recall/youtube_dnn/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/models/recall/youtube_dnn/config.yaml
+++ b/models/recall/youtube_dnn/config.yaml
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+train:
+  trainer:
+    # for cluster training
+    strategy: "async"
+  epochs: 3
+  workspace: "paddlerec.models.recall.youtube_dnn"
+  device: cpu
+  reader:
+    batch_size: 2
+    class: "{workspace}/random_reader.py"
+    train_data_path: "{workspace}/data/train"
+  model:
+    models: "{workspace}/model.py"
+    hyper_parameters:
+      watch_vec_size: 64
+      search_vec_size: 64
+      other_feat_size: 64
+      output_size: 100
+      layers: [128, 64, 32]
+      learning_rate: 0.01
+      optimizer: sgd
+  save:
+    increment:
+      dirname: "increment"
+      epoch_interval: 2
+      save_last: True
+    inference:
+      dirname: "inference"
+      epoch_interval: 4
+      save_last: True
--- a/models/recall/youtube_dnn/data/test/small_data.txt
+++ b/models/recall/youtube_dnn/data/test/small_data.txt
+4764,174,1
+4764,2958,0
+4764,452,0
+4764,1946,0
+4764,3208,0
+2044,2237,1
+2044,1998,0
+2044,328,0
+2044,1542,0
+2044,1932,0
+4276,65,1
+4276,3247,0
+4276,942,0
+4276,3666,0
+4276,2222,0
+3933,682,1
+3933,2451,0
+3933,3695,0
+3933,1643,0
+3933,3568,0
+1151,1265,1
+1151,118,0
+1151,2532,0
+1151,2083,0
+1151,2350,0
+1757,876,1
+1757,201,0
+1757,3633,0
+1757,1068,0
+1757,2549,0
+3370,276,1
+3370,2435,0
+3370,606,0
+3370,910,0
+3370,2146,0
+5137,1018,1
+5137,2163,0
+5137,3167,0
+5137,2315,0
+5137,3595,0
+3933,2831,1
+3933,2881,0
+3933,2949,0
+3933,3660,0
+3933,417,0
+3102,999,1
+3102,1902,0
+3102,2161,0
+3102,3042,0
+3102,1113,0
+2022,336,1
+2022,1672,0
+2022,2656,0
+2022,3649,0
+2022,883,0
+2664,655,1
+2664,3660,0
+2664,1711,0
+2664,3386,0
+2664,1668,0
+25,701,1
+25,32,0
+25,2482,0
+25,3177,0
+25,2767,0
+1738,1643,1
+1738,2187,0
+1738,228,0
+1738,650,0
+1738,3101,0
+5411,1241,1
+5411,2546,0
+5411,3019,0
+5411,3618,0
+5411,1674,0
+638,579,1
+638,3512,0
+638,783,0
+638,2111,0
+638,1880,0
+3554,200,1
+3554,2893,0
+3554,2428,0
+3554,969,0
+3554,2741,0
+4283,1074,1
+4283,3056,0
+4283,2032,0
+4283,405,0
+4283,1505,0
+5111,200,1
+5111,3488,0
+5111,477,0
+5111,2790,0
+5111,40,0
+3964,515,1
+3964,1528,0
+3964,2173,0
+3964,1701,0
+3964,2832,0
--- a/models/recall/youtube_dnn/data/train/samll_data.txt
+++ b/models/recall/youtube_dnn/data/train/samll_data.txt
+4764,174,1
+4764,2958,0
+4764,452,0
+4764,1946,0
+4764,3208,0
+2044,2237,1
+2044,1998,0
+2044,328,0
+2044,1542,0
+2044,1932,0
+4276,65,1
+4276,3247,0
+4276,942,0
+4276,3666,0
+4276,2222,0
+3933,682,1
+3933,2451,0
+3933,3695,0
+3933,1643,0
+3933,3568,0
+1151,1265,1
+1151,118,0
+1151,2532,0
+1151,2083,0
+1151,2350,0
+1757,876,1
+1757,201,0
+1757,3633,0
+1757,1068,0
+1757,2549,0
+3370,276,1
+3370,2435,0
+3370,606,0
+3370,910,0
+3370,2146,0
+5137,1018,1
+5137,2163,0
+5137,3167,0
+5137,2315,0
+5137,3595,0
+3933,2831,1
+3933,2881,0
+3933,2949,0
+3933,3660,0
+3933,417,0
+3102,999,1
+3102,1902,0
+3102,2161,0
+3102,3042,0
+3102,1113,0
+2022,336,1
+2022,1672,0
+2022,2656,0
+2022,3649,0
+2022,883,0
+2664,655,1
+2664,3660,0
+2664,1711,0
+2664,3386,0
+2664,1668,0
+25,701,1
+25,32,0
+25,2482,0
+25,3177,0
+25,2767,0
+1738,1643,1
+1738,2187,0
+1738,228,0
+1738,650,0
+1738,3101,0
+5411,1241,1
+5411,2546,0
+5411,3019,0
+5411,3618,0
+5411,1674,0
+638,579,1
+638,3512,0
+638,783,0
+638,2111,0
+638,1880,0
+3554,200,1
+3554,2893,0
+3554,2428,0
+3554,969,0
+3554,2741,0
+4283,1074,1
+4283,3056,0
+4283,2032,0
+4283,405,0
+4283,1505,0
+5111,200,1
+5111,3488,0
+5111,477,0
+5111,2790,0
+5111,40,0
+3964,515,1
+3964,1528,0
+3964,2173,0
+3964,1701,0
+3964,2832,0
--- a/models/recall/youtube_dnn/model.py
+++ b/models/recall/youtube_dnn/model.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import paddle.fluid as fluid
+from paddlerec.core.utils import envs
+from paddlerec.core.model import Model as ModelBase
+import numpy as np
+class Model(ModelBase):
+    def __init__(self, config):
+        ModelBase.__init__(self, config)
+    def input_data(self, is_infer=False):
+        watch_vec_size = envs.get_global_env("hyper_parameters.watch_vec_size", None, self._namespace)
+        search_vec_size = envs.get_global_env("hyper_parameters.search_vec_size", None, self._namespace)
+        other_feat_size = envs.get_global_env("hyper_parameters.other_feat_size", None, self._namespace)
+        watch_vec = fluid.data(name="watch_vec", shape=[None, watch_vec_size], dtype="float32")
+        search_vec = fluid.data(name="search_vec", shape=[None, search_vec_size], dtype="float32")
+        other_feat = fluid.data(name="other_feat", shape=[None, other_feat_size], dtype="float32")
+        label = fluid.data(name="label", shape=[None, 1], dtype="int64")
+        inputs = [watch_vec] + [search_vec] + [other_feat] + [label]
+        self._data_var = inputs
+        return inputs
+    def fc(self, tag, data, out_dim, active='relu'):
+        init_stddev = 1.0
+        scales = 1.0  / np.sqrt(data.shape[1])
+        if tag == 'l4':
+            p_attr = fluid.param_attr.ParamAttr(name='%s_weight' % tag,
+                        initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=init_stddev * scales))
+        else:
+            p_attr = None
+        b_attr = fluid.ParamAttr(name='%s_bias' % tag, initializer=fluid.initializer.Constant(0.1))
+        out = fluid.layers.fc(input=data,
+                            size=out_dim,
+                            act=active,
+                            param_attr=p_attr, 
+                            bias_attr =b_attr,
+                            name=tag)
+        return out
+    def net(self, inputs):
+        output_size = envs.get_global_env("hyper_parameters.output_size", None, self._namespace)
+        layers = envs.get_global_env("hyper_parameters.layers", None, self._namespace)
+        concat_feats = fluid.layers.concat(input=inputs[:-1], axis=-1)
+        l1 = self.fc('l1', concat_feats, layers[0], 'relu')
+        l2 = self.fc('l2', l1, layers[1], 'relu')
+        l3 = self.fc('l3', l2, layers[2], 'relu')
+        l4 = self.fc('l4', l3, output_size, 'softmax')
+        num_seqs = fluid.layers.create_tensor(dtype='int64')
+        acc = fluid.layers.accuracy(input=l4, label=inputs[-1], total=num_seqs)
+        cost = fluid.layers.cross_entropy(input=l4, label=inputs[-1])
+        avg_cost = fluid.layers.mean(cost)
+        self._cost = avg_cost
+        self._metrics["acc"] = acc
+    def train_net(self):
+        input_data = self.input_data()
+        self.net(input_data)
+    def infer_net(self):
+        pass
--- a/models/recall/youtube_dnn/random_reader.py
+++ b/models/recall/youtube_dnn/random_reader.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+from paddlerec.core.reader import Reader
+from paddlerec.core.utils import envs
+from collections import defaultdict
+import numpy as np
+class TrainReader(Reader):
+    def init(self):
+        self.watch_vec_size = envs.get_global_env("hyper_parameters.watch_vec_size", None, "train.model") 
+        self.search_vec_size = envs.get_global_env("hyper_parameters.search_vec_size", None, "train.model") 
+        self.other_feat_size = envs.get_global_env("hyper_parameters.other_feat_size", None, "train.model") 
+        self.output_size = envs.get_global_env("hyper_parameters.output_size", None, "train.model") 
+    def generate_sample(self, line):
+        """
+        the file is not used
+        """
+        def reader():
+            """
+            This function needs to be implemented by the user, based on data format
+            """
+            feature_name = ["watch_vec", "search_vec", "other_feat", "label"]
+            yield zip(feature_name, [np.random.rand(self.watch_vec_size).tolist()] + 
+                    [np.random.rand(self.search_vec_size).tolist()] + 
+                    [np.random.rand(self.other_feat_size).tolist()] +
+                    [[np.random.randint(self.output_size)]] )
+        return reader