Merge branch 'simnet' into 'develop'

add simnet See merge request !15

Merge branch 'simnet' into 'develop'
add simnet See merge request !15
decaa00f · malin10 · 922dbcf3 · c30368ea · decaa00f · decaa00f
10 changed file
--- a/fleet_rec/core/trainers/single_trainer.py
+++ b/fleet_rec/core/trainers/single_trainer.py
@@ -93,7 +93,7 @@ class SingleTrainer(TranspileTrainer):
                    metrics = [epoch, batch_id]
                    metrics.extend(metrics_rets)

-                    if batch_id % 10 == 0 and batch_id != 0:
+                    if batch_id % self.fetch_period == 0 and batch_id != 0:
                        print(metrics_format.format(*metrics))
                    batch_id += 1
            except fluid.core.EOFException:

--- a/models/recall/multiview-simnet/__init__.py
+++ b/models/recall/multiview-simnet/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/models/recall/multiview-simnet/config.yaml
+++ b/models/recall/multiview-simnet/config.yaml
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+evaluate:
+  workspace: "fleetrec.models.recall.multiview-simnet"
+  reader:
+    batch_size: 2
+    class: "{workspace}/evaluate_reader.py"
+    test_data_path: "{workspace}/data/test"
+
+train:
+  trainer:
+    # for cluster training
+    strategy: "async"
+
+  epochs: 2
+  workspace: "fleetrec.models.recall.multiview-simnet"
+
+  reader:
+    batch_size: 2
+    class: "{workspace}/reader.py"
+    train_data_path: "{workspace}/data/train"
+    dataset_class: "DataLoader"
+
+  model:
+    models: "{workspace}/model.py"
+    hyper_parameters:
+      use_DataLoader: True
+      query_encoder: "bow"
+      title_encoder: "bow"
+      query_encode_dim: 128
+      title_encode_dim: 128
+      query_slots: 1
+      title_slots: 1
+      sparse_feature_dim: 1000001
+      embedding_dim: 128
+      hidden_size: 128
+      learning_rate: 0.0001
+      optimizer: adam
+
+  save:
+    increment:
+      dirname: "increment"
+      epoch_interval: 1
+      save_last: True
+    inference:
+      dirname: "inference"
+      epoch_interval: 1
+      save_last: True
--- a/models/recall/multiview-simnet/data/test/test.txt
+++ b/models/recall/multiview-simnet/data/test/test.txt
+224289:0 126379:0 284519:0 549329:0 750666:0 393772:0 586898:0 736887:0 48785:0 906517:0 229162:1 483485:1 739835:1 29957:1 694497:1 997508:1 556876:1 717791:1 232176:1 430356:1
+366182:0 82062:0 708883:0 949128:0 798964:0 639103:0 409033:0 79301:0 405607:0 342616:0 61552:1 560547:1 3760:1 754734:1 98496:1 472427:1 979596:1 750283:1 492028:1 801383:1
+969571:0 405187:0 756217:0 563640:0 572168:0 881952:0 446260:0 692177:0 994140:0 485393:0 509081:1 297377:1 465399:1 934708:1 430949:1 135651:1 484531:1 385306:1 463957:1 996004:1
+436320:0 423131:0 963969:0 78345:0 879550:0 458203:0 684397:0 956202:0 989802:0 526101:0 852446:1 182545:1 625656:1 674856:1 422648:1 74100:1 48372:1 850830:1 336087:1 178251:1
+242683:0 118677:0 20731:0 970617:0 355890:0 739613:0 926695:0 963639:0 201043:0 611907:0 115309:1 310984:1 615584:1 638886:1 575934:1 889389:1 974807:1 570987:1 532482:1 911925:1
+954007:0 122623:0 168195:0 348901:0 217880:0 84759:0 925763:0 436382:0 573742:0 942921:0 553377:1 835046:1 137907:1 933870:1 766585:1 48483:1 543079:1 889467:1 521705:1 906676:1
+798690:0 617323:0 553266:0 232924:0 159461:0 404822:0 52992:0 364854:0 913876:0 547974:0 559472:1 748595:1 71793:1 357331:1 606888:1 477051:1 291481:1 89363:1 503881:1 423029:1
+228207:0 785250:0 661149:0 803304:0 478781:0 495202:0 804509:0 273065:0 26123:0 810840:0 801871:1 146772:1 421009:1 752344:1 946358:1 531668:1 5771:1 191294:1 627329:1 434664:1
+984628:0 762075:0 505288:0 48519:0 72492:0 26568:0 684085:0 613095:0 781547:0 895829:0 280541:1 903234:1 708065:1 386658:1 331060:1 3693:1 279760:1 459579:1 423552:1 962594:1
+674172:0 39271:0 646093:0 757969:0 553251:0 734960:0 967186:0 856940:0 617246:0 376452:0 113050:1 472707:1 975057:1 865095:1 155824:1 389921:1 205520:1 513667:1 163588:1 953463:1
--- a/models/recall/multiview-simnet/data/train/train.txt
+++ b/models/recall/multiview-simnet/data/train/train.txt
+7688:0 589671:0 339543:0 681723:0 339204:0 743067:0 897959:0 897541:0 571340:0 858141:0 68161:1 533957:1 288065:1 755516:1 179906:1 324817:1 116293:1 942079:1 455277:1 787142:1 251765:2 846187:2 586960:2 781883:2 430436:2 240100:2 686201:2 632045:2 585097:2 61976:2
+187966:0 194147:0 640819:0 283848:0 514875:0 310781:0 760083:0 281096:0 837090:0 928087:0 958908:1 451359:1 456136:1 577231:1 373371:1 651928:1 877106:1 721988:1 342265:1 114942:1 668915:2 502190:2 139044:2 213045:2 36710:2 119509:2 450285:2 165440:2 199495:2 798870:2
+477955:0 598041:0 452166:0 924550:0 152308:0 316225:0 285239:0 7967:0 177143:0 132244:0 391070:1 169561:1 256279:1 563588:1 749753:1 237035:1 550804:1 736257:1 71551:1 61944:1 102132:2 484023:2 82995:2 732704:2 114816:2 413165:2 197504:2 686192:2 253734:2 248157:2
+325819:0 140241:0 365103:0 334185:0 357327:0 613836:0 928004:0 595589:0 506569:0 539067:0 638196:1 729129:1 730912:1 701797:1 571150:1 140054:1 680316:1 889784:1 302584:1 676284:1 671069:2 212989:2 318469:2 732930:2 924564:2 147041:2 572412:2 662673:2 418312:2 382855:2
+839803:0 888881:0 957998:0 906486:0 44377:0 247842:0 994783:0 813449:0 168271:0 493685:0 269703:1 156692:1 686681:1 273684:1 312387:1 462033:1 669631:1 635437:1 74337:1 217677:1 582194:2 992666:2 860610:2 660766:2 24524:2 169856:2 882211:2 291866:2 44494:2 984736:2
+327559:0 627497:0 876526:0 243959:0 532929:0 639919:0 443220:0 952110:0 844723:0 372053:0 196819:1 326005:1 62242:1 774928:1 382727:1 348680:1 946697:1 625998:1 276517:1 251595:1 342204:2 825871:2 407136:2 724114:2 611341:2 517978:2 248341:2 111254:2 836867:2 677297:2
+72451:0 749548:0 283413:0 419402:0 67446:0 341795:0 918120:0 892028:0 113151:0 832663:0 758121:1 500602:1 734935:1 577972:1 205421:1 726739:1 276563:1 611928:1 185486:1 603502:1 633117:2 929300:2 332435:2 216848:2 412769:2 708304:2 800045:2 315869:2 444476:2 332565:2
+675647:0 212558:0 654982:0 321053:0 111172:0 635432:0 298523:0 612182:0 203835:0 288250:0 990034:1 891786:1 188524:1 480757:1 436783:1 874434:1 530090:1 492441:1 32835:1 886415:1 688876:2 626030:2 612348:2 208265:2 355885:2 603938:2 349931:2 86683:2 361956:2 705130:2
+164500:0 332294:0 373155:0 320413:0 801561:0 152827:0 28282:0 435913:0 376758:0 367848:0 285596:1 282674:1 357323:1 257195:1 948061:1 996976:1 300918:1 734644:1 870559:1 924205:1 45095:2 61352:2 242258:2 153354:2 763576:2 133542:2 431079:2 193327:2 655823:2 770159:2
+821764:0 184731:0 888413:0 793536:0 30049:0 533675:0 791254:0 92255:0 74185:0 557758:0 795898:1 15689:1 983592:1 248891:1 64421:1 387642:1 315522:1 526054:1 404172:1 704838:1 537016:2 383828:2 438418:2 885895:2 894698:2 228867:2 343213:2 411377:2 149957:2 810795:2
--- a/models/recall/multiview-simnet/data_process.sh
+++ b/models/recall/multiview-simnet/data_process.sh
+#! /bin/bash
+
+set -e
+echo "begin to prepare data"
+
+mkdir -p data/train
+mkdir -p data/test
+
+python generate_synthetic_data.py 
+
--- a/models/recall/multiview-simnet/evaluate_reader.py
+++ b/models/recall/multiview-simnet/evaluate_reader.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import io
+import copy
+import random
+from fleetrec.core.reader import Reader
+from fleetrec.core.utils import envs
+
+
+class EvaluateReader(Reader):
+    def init(self):
+        self.query_slots = envs.get_global_env("hyper_parameters.query_slots", None, "train.model") 
+        self.title_slots = envs.get_global_env("hyper_parameters.title_slots", None, "train.model") 
+
+        self.all_slots = []
+        for i in range(self.query_slots):
+            self.all_slots.append(str(i))
+
+        for i in range(self.title_slots):
+            self.all_slots.append(str(i + self.query_slots))
+
+        self._all_slots_dict = dict()
+        for index, slot in enumerate(self.all_slots):
+            self._all_slots_dict[slot] = [False, index]
+
+    def generate_sample(self, line):
+        def data_iter():
+            elements = line.rstrip().split()
+            padding = 0
+            output = [(slot, []) for slot in self.all_slots]
+            for elem in elements:
+                feasign, slot = elem.split(':')
+                if not self._all_slots_dict.has_key(slot):
+                    continue
+                self._all_slots_dict[slot][0] = True
+                index = self._all_slots_dict[slot][1]
+                output[index][1].append(int(feasign))
+            for slot in self._all_slots_dict:
+                visit, index = self._all_slots_dict[slot]
+                if visit:
+                    self._all_slots_dict[slot][0] = False
+                else:
+                    output[index][1].append(padding) 
+            yield output
+        return data_iter
--- a/models/recall/multiview-simnet/generate_synthetic_data.py
+++ b/models/recall/multiview-simnet/generate_synthetic_data.py
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+
+class Dataset:
+    def __init__(self):
+        pass
+
+class SyntheticDataset(Dataset):
+    def __init__(self, sparse_feature_dim, query_slot_num, title_slot_num, dataset_size=10000):
+        # ids are randomly generated
+        self.ids_per_slot = 10
+        self.sparse_feature_dim = sparse_feature_dim
+        self.query_slot_num = query_slot_num
+        self.title_slot_num = title_slot_num
+        self.dataset_size = dataset_size
+
+    def _reader_creator(self, is_train):
+        def generate_ids(num, space):
+            return [random.randint(0, space - 1) for i in range(num)]
+
+        def reader():
+            for i in range(self.dataset_size):
+                query_slots = []
+                pos_title_slots = []
+                neg_title_slots = []
+                for i in range(self.query_slot_num):
+                    qslot = generate_ids(self.ids_per_slot,
+                                         self.sparse_feature_dim)
+                    qslot = [str(fea) + ':' + str(i)  for fea in qslot]
+                    query_slots += qslot
+                for i in range(self.title_slot_num):
+                    pt_slot = generate_ids(self.ids_per_slot,
+                                           self.sparse_feature_dim)
+                    pt_slot = [str(fea) + ':' + str(i + self.query_slot_num) for fea in pt_slot]
+                    pos_title_slots += pt_slot
+                if is_train:
+                    for i in range(self.title_slot_num):
+                        nt_slot = generate_ids(self.ids_per_slot,
+                                               self.sparse_feature_dim)
+                        nt_slot = [str(fea) + ':' + str(i + self.query_slot_num + self.title_slot_num) for fea in nt_slot]
+                        neg_title_slots += nt_slot
+                    yield query_slots + pos_title_slots + neg_title_slots
+                else:
+                    yield query_slots + pos_title_slots
+
+        return reader
+
+    def train(self):
+        return self._reader_creator(True)
+
+    def valid(self):
+        return self._reader_creator(True)
+
+    def test(self):
+        return self._reader_creator(False)
+
+if __name__ == '__main__':
+    sparse_feature_dim = 1000001
+    query_slots = 1
+    title_slots = 1
+    dataset_size = 10
+    dataset = SyntheticDataset(sparse_feature_dim, query_slots, title_slots, dataset_size)
+    train_reader = dataset.train()
+    test_reader = dataset.test()
+	
+    with open("data/train/train.txt", 'w') as fout:
+        for data in train_reader():
+            fout.write(' '.join(data))
+            fout.write("\n")
+
+    with open("data/test/test.txt", 'w') as fout:
+        for data in test_reader():
+            fout.write(' '.join(data))
+            fout.write("\n")
--- a/models/recall/multiview-simnet/model.py
+++ b/models/recall/multiview-simnet/model.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import math
+import paddle.fluid as fluid
+import paddle.fluid.layers as layers
+import paddle.fluid.layers.tensor as tensor
+import paddle.fluid.layers.control_flow as cf
+
+from fleetrec.core.utils import envs
+from fleetrec.core.model import Model as ModelBase
+
+class BowEncoder(object):
+    """ bow-encoder """
+
+    def __init__(self):
+        self.param_name = ""
+
+    def forward(self, emb):
+        return fluid.layers.sequence_pool(input=emb, pool_type='sum')
+
+
+class CNNEncoder(object):
+    """ cnn-encoder"""
+
+    def __init__(self,
+                 param_name="cnn",
+                 win_size=3,
+                 ksize=128,
+                 act='tanh',
+                 pool_type='max'):
+        self.param_name = param_name
+        self.win_size = win_size
+        self.ksize = ksize
+        self.act = act
+        self.pool_type = pool_type
+
+    def forward(self, emb):
+        return fluid.nets.sequence_conv_pool(
+            input=emb,
+            num_filters=self.ksize,
+            filter_size=self.win_size,
+            act=self.act,
+            pool_type=self.pool_type,
+            param_attr=self.param_name + ".param",
+            bias_attr=self.param_name + ".bias")
+
+
+class GrnnEncoder(object):
+    """ grnn-encoder """
+
+    def __init__(self, param_name="grnn", hidden_size=128):
+        self.param_name = param_name
+        self.hidden_size = hidden_size
+
+    def forward(self, emb):
+        fc0 = fluid.layers.fc(input=emb,
+                              size=self.hidden_size * 3,
+                              param_attr=self.param_name + "_fc.w",
+                              bias_attr=False)
+
+        gru_h = fluid.layers.dynamic_gru(
+            input=fc0,
+            size=self.hidden_size,
+            is_reverse=False,
+            param_attr=self.param_name + ".param",
+            bias_attr=self.param_name + ".bias")
+        return fluid.layers.sequence_pool(input=gru_h, pool_type='max')
+
+
+class SimpleEncoderFactory(object):
+    def __init__(self):
+        pass
+
+    ''' create an encoder through create function '''
+
+    def create(self, enc_type, enc_hid_size):
+        if enc_type == "bow":
+            bow_encode = BowEncoder()
+            return bow_encode
+        elif enc_type == "cnn":
+            cnn_encode = CNNEncoder(ksize=enc_hid_size)
+            return cnn_encode
+        elif enc_type == "gru":
+            rnn_encode = GrnnEncoder(hidden_size=enc_hid_size)
+            return rnn_encode
+
+class Model(ModelBase):
+    def __init__(self, config):
+        ModelBase.__init__(self, config)
+        self.init_config()
+        
+    def init_config(self):
+        self._fetch_interval = 1 
+        query_encoder = envs.get_global_env("hyper_parameters.query_encoder", None, self._namespace)
+        title_encoder = envs.get_global_env("hyper_parameters.title_encoder", None, self._namespace)
+        query_encode_dim = envs.get_global_env("hyper_parameters.query_encode_dim", None, self._namespace)
+        title_encode_dim = envs.get_global_env("hyper_parameters.title_encode_dim", None, self._namespace)
+        query_slots = envs.get_global_env("hyper_parameters.query_slots", None, self._namespace)
+        title_slots = envs.get_global_env("hyper_parameters.title_slots", None, self._namespace)
+        factory = SimpleEncoderFactory()
+        self.query_encoders = [
+            factory.create(query_encoder, query_encode_dim)
+            for i in range(query_slots)
+        ]
+	self.title_encoders = [
+            factory.create(title_encoder, title_encode_dim)
+            for i in range(title_slots)
+        ]
+
+	self.emb_size = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace)
+	self.emb_dim = envs.get_global_env("hyper_parameters.embedding_dim", None, self._namespace)
+	self.emb_shape = [self.emb_size, self.emb_dim]
+	self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None, self._namespace)
+	self.margin = 0.1
+
+    def input(self, is_train=True):
+	self.q_slots = [
+            fluid.data(
+                name="%d" % i, shape=[None, 1], lod_level=1, dtype='int64')
+            for i in range(len(self.query_encoders))
+        ]
+        self.pt_slots = [
+            fluid.data(
+                name="%d" % (i + len(self.query_encoders)), shape=[None, 1], lod_level=1, dtype='int64')
+            for i in range(len(self.title_encoders))
+        ]
+
+	if is_train == False:
+	    return self.q_slots + self.pt_slots
+
+        self.nt_slots = [
+            fluid.data(
+                name="%d" % (i + len(self.query_encoders) + len(self.title_encoders)), shape=[None, 1], lod_level=1, dtype='int64')
+            for i in range(len(self.title_encoders))
+        ]
+
+        return self.q_slots + self.pt_slots + self.nt_slots
+    
+    def train_input(self):
+        res = self.input()
+        self._data_var = res
+
+        use_dataloader = envs.get_global_env("hyper_parameters.use_DataLoader", False, self._namespace) 
+
+        if self._platform != "LINUX" or use_dataloader:
+            self._data_loader = fluid.io.DataLoader.from_generator(
+                feed_list=self._data_var, capacity=256, use_double_buffer=False, iterable=False)
+
+    def get_acc(self, x, y):
+        less = tensor.cast(cf.less_than(x, y), dtype='float32')
+	label_ones = fluid.layers.fill_constant_batch_size_like(
+            input=x, dtype='float32', shape=[-1, 1], value=1.0)
+        correct = fluid.layers.reduce_sum(less)
+	total = fluid.layers.reduce_sum(label_ones)
+        acc = fluid.layers.elementwise_div(correct, total)
+	return acc
+
+    def net(self):
+	q_embs = [
+            fluid.embedding(
+                input=query, size=self.emb_shape, param_attr="emb")
+            for query in self.q_slots
+        ]
+        pt_embs = [
+            fluid.embedding(
+                input=title, size=self.emb_shape, param_attr="emb")
+            for title in self.pt_slots
+        ]
+        nt_embs = [
+            fluid.embedding(
+                input=title, size=self.emb_shape, param_attr="emb")
+            for title in self.nt_slots
+        ]
+        
+	# encode each embedding field with encoder
+        q_encodes = [
+            self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs)
+        ]
+        pt_encodes = [
+            self.title_encoders[i].forward(emb) for i, emb in enumerate(pt_embs)
+        ]
+        nt_encodes = [
+            self.title_encoders[i].forward(emb) for i, emb in enumerate(nt_embs)
+        ]
+
+        # concat multi view for query, pos_title, neg_title
+        q_concat = fluid.layers.concat(q_encodes)
+        pt_concat = fluid.layers.concat(pt_encodes)
+        nt_concat = fluid.layers.concat(nt_encodes)
+
+	# projection of hidden layer
+        q_hid = fluid.layers.fc(q_concat,
+                                size=self.hidden_size,
+                                param_attr='q_fc.w',
+                                bias_attr='q_fc.b')
+        pt_hid = fluid.layers.fc(pt_concat,
+                                 size=self.hidden_size,
+                                 param_attr='t_fc.w',
+                                 bias_attr='t_fc.b')
+        nt_hid = fluid.layers.fc(nt_concat,
+                                 size=self.hidden_size,
+                                 param_attr='t_fc.w',
+                                 bias_attr='t_fc.b')
+
+        # cosine of hidden layers
+        cos_pos = fluid.layers.cos_sim(q_hid, pt_hid)
+        cos_neg = fluid.layers.cos_sim(q_hid, nt_hid)
+
+	# pairwise hinge_loss
+        loss_part1 = fluid.layers.elementwise_sub(
+            tensor.fill_constant_batch_size_like(
+                input=cos_pos,
+                shape=[-1, 1],
+                value=self.margin,
+                dtype='float32'),
+            cos_pos)
+
+        loss_part2 = fluid.layers.elementwise_add(loss_part1, cos_neg)
+
+        loss_part3 = fluid.layers.elementwise_max(
+            tensor.fill_constant_batch_size_like(
+                input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'),
+            loss_part2)
+
+        self.avg_cost = fluid.layers.mean(loss_part3)
+       	self.acc = self.get_acc(cos_neg, cos_pos)	
+
+    def avg_loss(self):
+        self._cost = self.avg_cost
+
+    def metrics(self):
+        self._metrics["loss"] = self.avg_cost
+        self._metrics["acc"] = self.acc
+
+    def train_net(self):
+        self.train_input()
+        self.net()
+        self.avg_loss()
+        self.metrics()
+
+    def optimizer(self):
+        learning_rate = envs.get_global_env("hyper_parameters.learning_rate", None, self._namespace)
+	optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
+	return optimizer
+
+    def infer_input(self):
+        res = self.input(is_train=False)
+	self._infer_data_var = res
+
+        self._infer_data_loader = fluid.io.DataLoader.from_generator(
+            feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
+ 
+    def infer_net(self):
+	self.infer_input()
+	# lookup embedding for each slot
+        q_embs = [
+            fluid.embedding(
+                input=query, size=self.emb_shape, param_attr="emb")
+            for query in self.q_slots
+        ]
+        pt_embs = [
+            fluid.embedding(
+                input=title, size=self.emb_shape, param_attr="emb")
+            for title in self.pt_slots
+        ]
+	# encode each embedding field with encoder
+        q_encodes = [
+            self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs)
+        ]
+        pt_encodes = [
+            self.title_encoders[i].forward(emb) for i, emb in enumerate(pt_embs)
+        ]
+	# concat multi view for query, pos_title, neg_title
+        q_concat = fluid.layers.concat(q_encodes)
+        pt_concat = fluid.layers.concat(pt_encodes)
+        # projection of hidden layer
+        q_hid = fluid.layers.fc(q_concat,
+                                size=self.hidden_size,
+                                param_attr='q_fc.w',
+                                bias_attr='q_fc.b')
+        pt_hid = fluid.layers.fc(pt_concat,
+                                 size=self.hidden_size,
+                                 param_attr='t_fc.w',
+                                 bias_attr='t_fc.b')
+
+        # cosine of hidden layers
+        cos = fluid.layers.cos_sim(q_hid, pt_hid)
+        self._infer_results['query_pt_sim'] = cos
--- a/models/recall/multiview-simnet/reader.py
+++ b/models/recall/multiview-simnet/reader.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import io
+import copy
+import random
+from fleetrec.core.reader import Reader
+from fleetrec.core.utils import envs
+
+
+class TrainReader(Reader):
+    def init(self):
+        self.query_slots = envs.get_global_env("hyper_parameters.query_slots", None, "train.model") 
+        self.title_slots = envs.get_global_env("hyper_parameters.title_slots", None, "train.model") 
+
+        self.all_slots = []
+        for i in range(self.query_slots):
+            self.all_slots.append(str(i))
+
+        for i in range(self.title_slots):
+            self.all_slots.append(str(i + self.query_slots))
+
+        for i in range(self.title_slots):
+            self.all_slots.append(str(i + self.query_slots + self.title_slots))
+
+        self._all_slots_dict = dict()
+        for index, slot in enumerate(self.all_slots):
+            self._all_slots_dict[slot] = [False, index]
+
+    def generate_sample(self, line):
+        def data_iter():
+            elements = line.rstrip().split()
+            padding = 0
+            output = [(slot, []) for slot in self.all_slots]
+            for elem in elements:
+                feasign, slot = elem.split(':')
+                if not self._all_slots_dict.has_key(slot):
+                    continue
+                self._all_slots_dict[slot][0] = True
+                index = self._all_slots_dict[slot][1]
+                output[index][1].append(int(feasign))
+            for slot in self._all_slots_dict:
+                visit, index = self._all_slots_dict[slot]
+                if visit:
+                    self._all_slots_dict[slot][0] = False
+                else:
+                    output[index][1].append(padding) 
+            yield output
+        return data_iter