add movie_recommand_demo

2eba5d90 · malin10 · dfb74b32 · 2eba5d90 · 2eba5d90 · 2eba5d90
19 changed file
--- a/core/trainers/single_infer.py
+++ b/core/trainers/single_infer.py
@@ -20,6 +20,8 @@ from __future__ import print_function
 import time
 import logging
 import os
+import json
+import numpy as np
 import paddle.fluid as fluid
 from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
@@ -263,8 +265,10 @@ class SingleInfer(TranspileTrainer):
            envs.get_global_env("runner." + self._runner_name +
                                ".print_interval", 20))
        metrics_format.append("{}: {{}}".format("batch"))
+        metrics_indexes = dict()
        for name, var in metrics.items():
            metrics_varnames.append(var.name)
+            metrics_indexes[var.name] = len(metrics_varnames) - 1
            metrics_format.append("{}: {{}}".format(name))
        metrics_format = ", ".join(metrics_format)
@@ -272,19 +276,30 @@ class SingleInfer(TranspileTrainer):
        reader.start()
        batch_id = 0
        scope = self._model[model_name][2]
+        infer_results = []
        with fluid.scope_guard(scope):
            try:
                while True:
                    metrics_rets = self._exe.run(program=program,
-                                                 fetch_list=metrics_varnames)
+                                                 fetch_list=metrics_varnames,
+                                                 return_numpy=False)
                    metrics = [batch_id]
                    metrics.extend(metrics_rets)
+                    batch_infer_result = {}
+                    for k, v in metrics_indexes.items():
+                        batch_infer_result[k] = np.array(metrics_rets[
+                            v]).tolist()
+                    infer_results.append(batch_infer_result)
                    if batch_id % fetch_period == 0 and batch_id != 0:
                        print(metrics_format.format(*metrics))
                    batch_id += 1
            except fluid.core.EOFException:
                reader.reset()
+        with open(model_dict['save_path'], 'w') as fout:
+            json.dump(infer_results, fout)
    def terminal(self, context):
        context['is_exit'] = True

--- a/demo/__init__.py
+++ b/demo/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/demo/movie_recommand/__init__.py
+++ b/demo/movie_recommand/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/demo/movie_recommand/data/online_user/users.dat
+++ b/demo/movie_recommand/data/online_user/users.dat
+2181::M::25::0
+2073::F::18::4
--- a/demo/movie_recommand/data/process_ml_1m.py
+++ b/demo/movie_recommand/data/process_ml_1m.py
+#coding=utf8
+import sys
+reload(sys)
+sys.setdefaultencoding('utf-8')
+import random
+import json
+user_fea = ["userid", "gender", "age", "occupation"]
+movie_fea = ["movieid", "title", "genres"]
+rating_fea = ["userid", "movieid", "rating", "time"]
+dict_size = 60000000
+hash_dict = dict()
+data_path = "ml-1m"
+test_user_path = "online_user"
+def process(path):
+    user_dict = parse_data(data_path + "/users.dat", user_fea)
+    movie_dict = parse_movie_data(data_path + "/movies.dat", movie_fea)
+    for line in open(path):
+        line = line.strip()
+        arr = line.split("::")
+        userid = arr[0]
+        movieid = arr[1]
+        out_str = "time:%s\t%s\t%s\tlabel:%s" % (arr[3], user_dict[userid],
+                                                 movie_dict[movieid], arr[2])
+        log_id = hash(out_str) % 1000000000
+        print "%s\t%s" % (log_id, out_str)
+def parse_data(file_name, feas):
+    dict = {}
+    for line in open(file_name):
+        line = line.strip()
+        arr = line.split("::")
+        out_str = ""
+        for i in range(0, len(feas)):
+            out_str += "%s:%s\t" % (feas[i], arr[i])
+        dict[arr[0]] = out_str.strip()
+    return dict
+def parse_movie_data(file_name, feas):
+    dict = {}
+    for line in open(file_name):
+        line = line.strip()
+        arr = line.split("::")
+        title_str = ""
+        genres_str = ""
+        for term in arr[1].split(" "):
+            term = term.strip()
+            if term != "":
+                title_str += "%s " % (term)
+        for term in arr[2].split("|"):
+            term = term.strip()
+            if term != "":
+                genres_str += "%s " % (term)
+        out_str = "movieid:%s\ttitle:%s\tgenres:%s" % (
+            arr[0], title_str.strip(), genres_str.strip())
+        dict[arr[0]] = out_str.strip()
+    return dict
+def to_hash(in_str):
+    feas = in_str.split(":")[0]
+    arr = in_str.split(":")[1]
+    out_str = "%s:%s" % (feas, (arr + arr[::-1] + arr[::-2] + arr[::-3]))
+    hash_id = hash(out_str) % dict_size
+    if hash_id in hash_dict and hash_dict[hash_id] != out_str:
+        print(hash_id, out_str, hash(out_str))
+        print("conflict")
+        exit(-1)
+    return "%s:%s" % (feas, hash_id)
+def to_hash_list(in_str):
+    arr = in_str.split(":")
+    tmp_arr = arr[1].split(" ")
+    out_str = ""
+    for item in tmp_arr:
+        item = item.strip()
+        if item != "":
+            key = "%s:%s" % (arr[0], item)
+            out_str += "%s " % (to_hash(key))
+    return out_str.strip()
+def get_hash(path):
+    #0-34831 1-time:974673057 2-userid:2021 3-gender:M 4-age:25 5-occupation:0 6-movieid:1345  7-title:Carrie (1976)  8-genres:Horror  9-label:2
+    for line in open(path):
+        arr = line.strip().split("\t")
+        out_str = "logid:%s %s %s %s %s %s %s %s %s %s" % \
+                 (arr[0], arr[1], to_hash(arr[2]), to_hash(arr[3]), to_hash(arr[4]), to_hash(arr[5]), \
+                 to_hash(arr[6]), to_hash_list(arr[7]), to_hash_list(arr[8]), arr[9])
+        print out_str
+def generate_online_user():
+    movie_dict = parse_movie_data(data_path + "/movies.dat", movie_fea)
+    with open(test_user_path + "/movies.dat", 'w') as f:
+        for line in open(test_user_path + "/users.dat"):
+            line = line.strip()
+            arr = line.split("::")
+            userid = arr[0]
+            for item in movie_dict:
+                f.write(userid + "::" + item + "::1")
+                f.write("\n")
+def generate_online_data(path):
+    user_dict = parse_data(data_path + "/users.dat", user_fea)
+    movie_dict = parse_movie_data(data_path + "/movies.dat", movie_fea)
+    for line in open(path):
+        line = line.strip()
+        arr = line.split("::")
+        userid = arr[0]
+        movieid = arr[1]
+        label = arr[2]
+        out_str = "time:%s\t%s\t%s\tlabel:%s" % ("1", user_dict[userid],
+                                                 movie_dict[movieid], label)
+        log_id = hash(out_str) % 1000000000
+        res = "%s\t%s" % (log_id, out_str)
+        arr = res.strip().split("\t")
+        out_str = "logid:%s %s %s %s %s %s %s %s %s %s" % \
+                (arr[0], arr[1], to_hash(arr[2]), to_hash(arr[3]), to_hash(arr[4]), to_hash(arr[5]), \
+                to_hash(arr[6]), to_hash_list(arr[7]), to_hash_list(arr[8]), arr[9])
+        print(out_str)
+if __name__ == "__main__":
+    random.seed(1111111)
+    if sys.argv[1] == "process_raw":
+        process(sys.argv[2])
+    elif sys.argv[1] == "hash":
+        get_hash(sys.argv[2])
+    elif sys.argv[1] == "data_recall":
+        generate_online_user()
+        generate_online_data(test_user_path + "/movies.dat")
+    elif sys.argv[1] == "data_rank":
+        generate_online_data(test_user_path + "/movies.dat")
--- a/demo/movie_recommand/data/split.py
+++ b/demo/movie_recommand/data/split.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import random
+train = dict()
+test = dict()
+data_path = "ml-1m"
+for line in open(data_path + "/ratings.dat"):
+    fea = line.rstrip().split("::")
+    if fea[0] not in train:
+        train[fea[0]] = [line]
+    elif fea[0] not in test:
+        test[fea[0]] = dict()
+        test[fea[0]]['time'] = int(fea[3])
+        test[fea[0]]['content'] = line
+    else:
+        time = int(fea[3])
+        if time <= test[fea[0]]['time']:
+            train[fea[0]].append(line)
+        else:
+            train[fea[0]].append(test[fea[0]]['content'])
+            test[fea[0]]['time'] = time
+            test[fea[0]]['content'] = line
+train_data = []
+for key in train:
+    for line in train[key]:
+        train_data.append(line)
+random.shuffle(train_data)
+with open(data_path + "/train.dat", 'w') as f:
+    for line in train_data:
+        f.write(line)
+with open(data_path + "/test.dat", 'w') as f:
+    for key in test:
+        f.write(test[key]['content'])
--- a/demo/movie_recommand/data/test/log.data.hash
+++ b/demo/movie_recommand/data/test/log.data.hash
--- a/demo/movie_recommand/data/train/log.data.hash
+++ b/demo/movie_recommand/data/train/log.data.hash
--- a/demo/movie_recommand/data_prepare.sh
+++ b/demo/movie_recommand/data_prepare.sh
+cd data
+wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
+unzip ml-1m.zip
+python split.py
+mkdir train/
+mkdir test/
+python process_ml_1m.py process_raw ./ml-1m/train.dat | sort -t $'\t' -k 9 -n > log.data.train
+python process_ml_1m.py process_raw ./ml-1m/test.dat | sort -t $'\t' -k 9 -n > log.data.test
+python process_ml_1m.py hash log.data.train > ./train/log.data.hash
+python process_ml_1m.py hash log.data.test > ./test/log.data.hash
+rm log.data.train
+rm log.data.test
+cd ../
--- a/demo/movie_recommand/offline_test.sh
+++ b/demo/movie_recommand/offline_test.sh
+## modify config.yaml to infer mode at first
+cd recall
+python -m paddlerec.run -m ./config.yaml
+cd ../rank
+python -m paddlerec.run -m ./config.yaml
+cd ..
+echo "recall offline test result:"
+python parse.py recall_offline recall/infer_result
+echo "rank offline test result:"
+python parse.py recall_offline rank/infer_result
--- a/demo/movie_recommand/online_rank.sh
+++ b/demo/movie_recommand/online_rank.sh
+cd data
+python process_ml_1m.py data_rank > online_user/test/data.txt
+## modify recall/config.yaml to online_infer mode
+cd ../rank
+python -m paddlerec.run -m ./config.yaml
+cd ../
+python parse.py rank_online rank/infer_result
--- a/demo/movie_recommand/online_recall.sh
+++ b/demo/movie_recommand/online_recall.sh
+cd data
+mkdir online_user/test
+python process_ml_1m.py data_recall > online_user/test/data.txt
+## modify recall/config.yaml to online_infer mode
+cd ../recall
+python -m paddlerec.run -m ./config.yaml
+cd ../
+python parse.py recall_online recall/infer_result
--- a/demo/movie_recommand/rank/__init__.py
+++ b/demo/movie_recommand/rank/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/demo/movie_recommand/rank/config.yaml
+++ b/demo/movie_recommand/rank/config.yaml
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+workspace: "demo/movie_recommand"
+# list of dataset
+dataset:
+- name: dataset_train # name of dataset to distinguish different datasets
+  batch_size: 128
+  type: QueueDataset 
+  data_path: "{workspace}/data/train"
+  sparse_slots: "logid time userid gender age occupation movieid title genres label"
+  dense_slots: ""
+- name: dataset_infer # name
+  batch_size: 128
+  type: DataLoader
+  data_path: "{workspace}/data/test"
+  sparse_slots: "logid time userid gender age occupation movieid title genres label"
+  dense_slots: ""
+- name: dataset_online_infer # name
+  batch_size: 10
+  type: DataLoader
+  data_path: "{workspace}/data/online_user/test"
+  sparse_slots: "logid time userid gender age occupation movieid title genres label"
+  dense_slots: ""
+# hyper parameters of user-defined network
+hyper_parameters:
+  # optimizer config
+  optimizer:
+    class: Adam
+    learning_rate: 0.001
+    strategy: async
+  # user-defined <key, value> pairs
+  sparse_feature_number: 60000000
+  sparse_feature_dim: 9
+  dense_input_dim: 13
+  fc_sizes: [512, 256, 128, 32]
+# train
+mode: runner_train
+## online or offline infer
+#mode: runner_infer
+runner:
+- name: runner_train
+  class: single_train
+  save_checkpoint_interval: 1 # save model interval of epochs
+  save_inference_interval: 1 # save inference
+  save_checkpoint_path: "increment" # save checkpoint path
+  save_inference_path: "inference" # save inference path
+  epochs: 10
+  device: cpu
+- name: runner_infer
+  epochs: 1
+  class: single_infer
+  print_interval: 10000
+  init_model_path: "increment/9" # load model path
+#train
+phase:
+- name: phase1
+  model: "{workspace}/model.py" # user-defined model
+  dataset_name: dataset_train # select dataset by name
+  thread_num: 12
+##offline infer
+#phase:
+#- name: phase1
+#  model: "{workspace}/model.py" # user-defined model
+#  dataset_name: dataset_infer # select dataset by name
+#  save_path: "./infer_result"
+#  thread_num: 1
+##offline infer
+#phase:
+#- name: phase1
+#  model: "{workspace}/model.py" # user-defined model
+#  dataset_name: dataset_online_infer # select dataset by name
+#  save_path: "./infer_result"
+#  thread_num: 1
--- a/demo/movie_recommand/rank/model.py
+++ b/demo/movie_recommand/rank/model.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import paddle.fluid as fluid
+from paddlerec.core.utils import envs
+from paddlerec.core.model import Model as ModelBase
+class Model(ModelBase):
+    def __init__(self, config):
+        ModelBase.__init__(self, config)
+    def _init_hyper_parameters(self):
+        self.is_distributed = True if envs.get_trainer(
+        ) == "CtrTrainer" else False
+        self.sparse_feature_number = envs.get_global_env(
+            "hyper_parameters.sparse_feature_number")
+        self.sparse_feature_dim = envs.get_global_env(
+            "hyper_parameters.sparse_feature_dim")
+        self.learning_rate = envs.get_global_env(
+            "hyper_parameters.optimizer.learning_rate")
+        self.hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes")
+    def net(self, input, is_infer=False):
+        self.user_sparse_inputs = self._sparse_data_var[2:6]
+        self.mov_sparse_inputs = self._sparse_data_var[6:9]
+        self.label_input = self._sparse_data_var[-1]
+        def fc(input):
+            fcs = [input]
+            for size in self.hidden_layers:
+                output = fluid.layers.fc(
+                    input=fcs[-1],
+                    size=size,
+                    act='relu',
+                    param_attr=fluid.ParamAttr(
+                        initializer=fluid.initializer.Normal(
+                            scale=1.0 / math.sqrt(fcs[-1].shape[1]))))
+                fcs.append(output)
+            return fcs[-1]
+        def embedding_layer(input):
+            emb = fluid.layers.embedding(
+                input=input,
+                is_sparse=True,
+                is_distributed=self.is_distributed,
+                size=[self.sparse_feature_number, self.sparse_feature_dim],
+                param_attr=fluid.ParamAttr(
+                    name="emb", initializer=fluid.initializer.Uniform()), )
+            emb_sum = fluid.layers.sequence_pool(input=emb, pool_type='sum')
+            return emb_sum
+        user_sparse_embed_seq = list(
+            map(embedding_layer, self.user_sparse_inputs))
+        mov_sparse_embed_seq = list(
+            map(embedding_layer, self.mov_sparse_inputs))
+        concated_user = fluid.layers.concat(user_sparse_embed_seq, axis=1)
+        concated_mov = fluid.layers.concat(mov_sparse_embed_seq, axis=1)
+        usr_combined_features = fc(concated_user)
+        mov_combined_features = fc(concated_mov)
+        fc_input = fluid.layers.concat(
+            [usr_combined_features, mov_combined_features], axis=1)
+        sim = fluid.layers.fc(
+            input=fc_input,
+            size=1,
+            act='sigmoid',
+            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
+                scale=1.0 / math.sqrt(fc_input.shape[1]))))
+        predict = fluid.layers.scale(sim, scale=5)
+        self.predict = predict
+        #auc, batch_auc, _ = fluid.layers.auc(input=self.predict,
+        #                                     label=self.label_input,
+        #                                     num_thresholds=10000,
+        #                                     slide_steps=20)
+        if is_infer:
+            self._infer_results["user_feature"] = usr_combined_features
+            self._infer_results["movie_feature"] = mov_combined_features
+            self._infer_results["uid"] = self._sparse_data_var[2]
+            self._infer_results["movieid"] = self._sparse_data_var[6]
+            self._infer_results["label"] = self._sparse_data_var[-1]
+            self._infer_results["predict"] = self.predict
+            return
+        #self._metrics["AUC"] = auc
+        #self._metrics["BATCH_AUC"] = batch_auc
+        #cost = fluid.layers.cross_entropy(
+        #    input=self.predict, label=self.label_input)
+        cost = fluid.layers.square_error_cost(
+            self.predict,
+            fluid.layers.cast(
+                x=self.label_input, dtype='float32'))
+        avg_cost = fluid.layers.reduce_mean(cost)
+        self._cost = avg_cost
+        self._metrics["LOSS"] = avg_cost
+    def optimizer(self):
+        optimizer = fluid.optimizer.Adam(self.learning_rate, lazy_mode=True)
+        return optimizer
+    def infer_net(self):
+        pass
--- a/demo/movie_recommand/recall/__init__.py
+++ b/demo/movie_recommand/recall/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/demo/movie_recommand/recall/config.yaml
+++ b/demo/movie_recommand/recall/config.yaml
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+workspace: "demo/movie_recommand"
+# list of dataset
+dataset:
+- name: dataset_train # name of dataset to distinguish different datasets
+  batch_size: 128
+  type: QueueDataset 
+  data_path: "{workspace}/data/train"
+  sparse_slots: "logid time userid gender age occupation movieid title genres label"
+  dense_slots: ""
+- name: dataset_infer # name
+  batch_size: 128
+  type: DataLoader
+  data_path: "{workspace}/data/test"
+  sparse_slots: "logid time userid gender age occupation movieid title genres label"
+  dense_slots: ""
+- name: dataset_online_infer # name
+  batch_size: 128
+  type: DataLoader
+  data_path: "{workspace}/data/online_user/test"
+  sparse_slots: "logid time userid gender age occupation movieid title genres label"
+  dense_slots: ""
+# hyper parameters of user-defined network
+hyper_parameters:
+  # optimizer config
+  optimizer:
+    class: Adam
+    learning_rate: 0.001
+    strategy: async
+  # user-defined <key, value> pairs
+  sparse_feature_number: 60000000
+  sparse_feature_dim: 9
+  dense_input_dim: 13
+  fc_sizes: [512, 256, 128, 32]
+# train
+mode: runner_train
+## online or offline infer
+#mode: runner_infer
+runner:
+- name: runner_train
+  class: single_train
+  save_checkpoint_interval: 1 # save model interval of epochs
+  save_inference_interval: 1 # save inference
+  save_checkpoint_path: "increment" # save checkpoint path
+  save_inference_path: "inference" # save inference path
+  epochs: 10
+  device: cpu
+- name: runner_infer
+  epochs: 1
+  class: single_infer
+  print_interval: 10000
+  init_model_path: "increment/9" # load model path
+#train
+phase:
+- name: phase1
+  model: "{workspace}/model.py" # user-defined model
+  dataset_name: dataset_train # select dataset by name
+  thread_num: 12
+##offline infer
+#phase:
+#- name: phase1
+#  model: "{workspace}/model.py" # user-defined model
+#  dataset_name: dataset_infer # select dataset by name
+#  save_path: "./infer_result"
+#  thread_num: 1
+##offline infer
+#phase:
+#- name: phase1
+#  model: "{workspace}/model.py" # user-defined model
+#  dataset_name: dataset_online_infer # select dataset by name
+#  save_path: "./infer_result"
+#  thread_num: 1
--- a/demo/movie_recommand/recall/model.py
+++ b/demo/movie_recommand/recall/model.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import paddle.fluid as fluid
+from paddlerec.core.utils import envs
+from paddlerec.core.model import Model as ModelBase
+class Model(ModelBase):
+    def __init__(self, config):
+        ModelBase.__init__(self, config)
+    def _init_hyper_parameters(self):
+        self.is_distributed = True if envs.get_trainer(
+        ) == "CtrTrainer" else False
+        self.sparse_feature_number = envs.get_global_env(
+            "hyper_parameters.sparse_feature_number")
+        self.sparse_feature_dim = envs.get_global_env(
+            "hyper_parameters.sparse_feature_dim")
+        self.learning_rate = envs.get_global_env(
+            "hyper_parameters.optimizer.learning_rate")
+        self.hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes")
+    def net(self, input, is_infer=False):
+        self.user_sparse_inputs = self._sparse_data_var[2:6]
+        self.mov_sparse_inputs = self._sparse_data_var[6:9]
+        self.label_input = self._sparse_data_var[-1]
+        def fc(input):
+            fcs = [input]
+            for size in self.hidden_layers:
+                output = fluid.layers.fc(
+                    input=fcs[-1],
+                    size=size,
+                    act='relu',
+                    param_attr=fluid.ParamAttr(
+                        initializer=fluid.initializer.Normal(
+                            scale=1.0 / math.sqrt(fcs[-1].shape[1]))))
+                fcs.append(output)
+            return fcs[-1]
+        def embedding_layer(input):
+            emb = fluid.layers.embedding(
+                input=input,
+                is_sparse=True,
+                is_distributed=self.is_distributed,
+                size=[self.sparse_feature_number, self.sparse_feature_dim],
+                param_attr=fluid.ParamAttr(
+                    name="emb", initializer=fluid.initializer.Uniform()), )
+            emb_sum = fluid.layers.sequence_pool(input=emb, pool_type='sum')
+            return emb_sum
+        user_sparse_embed_seq = list(
+            map(embedding_layer, self.user_sparse_inputs))
+        mov_sparse_embed_seq = list(
+            map(embedding_layer, self.mov_sparse_inputs))
+        concated_user = fluid.layers.concat(user_sparse_embed_seq, axis=1)
+        concated_mov = fluid.layers.concat(mov_sparse_embed_seq, axis=1)
+        usr_combined_features = fc(concated_user)
+        mov_combined_features = fc(concated_mov)
+        sim = fluid.layers.cos_sim(
+            X=usr_combined_features, Y=mov_combined_features)
+        predict = fluid.layers.scale(sim, scale=5)
+        self.predict = predict
+        if is_infer:
+            self._infer_results["uid"] = self._sparse_data_var[2]
+            self._infer_results["movieid"] = self._sparse_data_var[6]
+            self._infer_results["label"] = self._sparse_data_var[-1]
+            self._infer_results["predict"] = self.predict
+            return
+        cost = fluid.layers.square_error_cost(
+            self.predict,
+            fluid.layers.cast(
+                x=self.label_input, dtype='float32'))
+        avg_cost = fluid.layers.reduce_mean(cost)
+        self._cost = avg_cost
+        self._metrics["LOSS"] = avg_cost
+    def optimizer(self):
+        optimizer = fluid.optimizer.Adam(self.learning_rate, lazy_mode=True)
+        return optimizer
--- a/demo/movie_recommand/train.sh
+++ b/demo/movie_recommand/train.sh
+cd recall
+python -m paddlerec.run -m ./config.yaml
+cd ../rank
+python -m paddlerec.run -m ./config.yaml &> train_log &
+cd ..
+echo "recall offline test: "
+python infer_analys