diff --git a/doc/imgs/overview.png b/doc/imgs/overview.png
index b9d5a172f8c8443c69e338e819fe454f28206ea0..83341cb3b96a257117f07e452993911277823f80 100644
Binary files a/doc/imgs/overview.png and b/doc/imgs/overview.png differ
diff --git a/models/multitask/esmm/config.yaml b/models/multitask/esmm/config.yaml
index f40b967c1c02175debd44bfdc15a6d48c4208de6..b1412515d4c751d0980eb128601cb08066562b41 100644
--- a/models/multitask/esmm/config.yaml
+++ b/models/multitask/esmm/config.yaml
@@ -12,40 +12,55 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-evaluate:
- reader:
- batch_size: 1
- class: "{workspace}/esmm_infer_reader.py"
- test_data_path: "{workspace}/data/train"
-train:
- trainer:
- # for cluster training
- strategy: "async"
+workspace: "paddlerec.models.multitask.esmm"
- epochs: 3
- workspace: "paddlerec.models.multitask.esmm"
- device: cpu
+dataset:
+- name: dataset_train
+ batch_size: 1
+ type: QueueDataset
+ data_path: "{workspace}/data/train"
+ data_converter: "{workspace}/esmm_reader.py"
+- name: dataset_infer
+ batch_size: 1
+ type: QueueDataset
+ data_path: "{workspace}/data/test"
+ data_converter: "{workspace}/esmm_reader.py"
- reader:
- batch_size: 2
- class: "{workspace}/esmm_reader.py"
- train_data_path: "{workspace}/data/train"
+hyper_parameters:
+ vocab_size: 10000
+ embed_size: 128
+ optimizer:
+ class: adam
+ learning_rate: 0.001
+ strategy: async
- model:
- models: "{workspace}/model.py"
- hyper_parameters:
- vocab_size: 10000
- embed_size: 128
- learning_rate: 0.001
- optimizer: adam
+#use infer_runner mode and modify 'phase' below if infer
+mode: train_runner
+#mode: infer_runner
+
+runner:
+- name: train_runner
+ class: single_train
+ device: cpu
+ epochs: 3
+ save_checkpoint_interval: 2
+ save_inference_interval: 4
+ save_checkpoint_path: "increment"
+ save_inference_path: "inference"
+ print_interval: 10
+- name: infer_runner
+ class: single_infer
+ init_model_path: "increment/0"
+ device: cpu
+ epochs: 3
- save:
- increment:
- dirname: "increment"
- epoch_interval: 2
- save_last: True
- inference:
- dirname: "inference"
- epoch_interval: 4
- save_last: True
+phase:
+- name: train
+ model: "{workspace}/model.py"
+ dataset_name: dataset_train
+ thread_num: 1
+ #- name: infer
+ # model: "{workspace}/model.py"
+ # dataset_name: dataset_infer
+ # thread_num: 1
diff --git a/models/multitask/esmm/data/train/small.csv b/models/multitask/esmm/data/train/small.txt
similarity index 100%
rename from models/multitask/esmm/data/train/small.csv
rename to models/multitask/esmm/data/train/small.txt
diff --git a/models/multitask/esmm/esmm_infer_reader.py b/models/multitask/esmm/esmm_infer_reader.py
deleted file mode 100644
index 70e3e989df611419f378a8920b499e42690d1cae..0000000000000000000000000000000000000000
--- a/models/multitask/esmm/esmm_infer_reader.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-
-from collections import defaultdict
-
-from paddlerec.core.reader import Reader
-
-
-class EvaluateReader(Reader):
- def init(self):
- all_field_id = [
- '101', '109_14', '110_14', '127_14', '150_14', '121', '122', '124',
- '125', '126', '127', '128', '129', '205', '206', '207', '210',
- '216', '508', '509', '702', '853', '301'
- ]
- self.all_field_id_dict = defaultdict(int)
- for i, field_id in enumerate(all_field_id):
- self.all_field_id_dict[field_id] = [False, i]
-
- def generate_sample(self, line):
- """
- Read the data line by line and process it as a dictionary
- """
-
- def reader():
- """
- This function needs to be implemented by the user, based on data format
- """
- features = line.strip().split(',')
- ctr = int(features[1])
- cvr = int(features[2])
-
- padding = 0
- output = [(field_id, []) for field_id in self.all_field_id_dict]
-
- for elem in features[4:]:
- field_id, feat_id = elem.strip().split(':')
- if field_id not in self.all_field_id_dict:
- continue
- self.all_field_id_dict[field_id][0] = True
- index = self.all_field_id_dict[field_id][1]
- output[index][1].append(int(feat_id))
-
- for field_id in self.all_field_id_dict:
- visited, index = self.all_field_id_dict[field_id]
- if visited:
- self.all_field_id_dict[field_id][0] = False
- else:
- output[index][1].append(padding)
- output.append(('ctr', [ctr]))
- output.append(('cvr', [cvr]))
- yield output
-
- return reader
diff --git a/models/multitask/esmm/esmm_reader.py b/models/multitask/esmm/esmm_reader.py
index 036e146ee923b6feda6398c7dcd49486eac51c50..5a3f3f916e1395a05b2f59a98132e5220dd224b9 100644
--- a/models/multitask/esmm/esmm_reader.py
+++ b/models/multitask/esmm/esmm_reader.py
@@ -40,8 +40,6 @@ class TrainReader(Reader):
This function needs to be implemented by the user, based on data format
"""
features = line.strip().split(',')
- # ctr = list(map(int, features[1]))
- # cvr = list(map(int, features[2]))
ctr = int(features[1])
cvr = int(features[2])
@@ -54,7 +52,6 @@ class TrainReader(Reader):
continue
self.all_field_id_dict[field_id][0] = True
index = self.all_field_id_dict[field_id][1]
- # feat_id = list(map(int, feat_id))
output[index][1].append(int(feat_id))
for field_id in self.all_field_id_dict:
diff --git a/models/multitask/esmm/model.py b/models/multitask/esmm/model.py
index 71c6539579504407a22f3174407b517f9d9a55b5..b4b257ed8a74829d3619c3b07bbb0cfc8e69ddde 100644
--- a/models/multitask/esmm/model.py
+++ b/models/multitask/esmm/model.py
@@ -23,28 +23,11 @@ class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
- def fc(self, tag, data, out_dim, active='prelu'):
+ def _init_hyper_parameters(self):
+ self.vocab_size = envs.get_global_env("hyper_parameters.vocab_size")
+ self.embed_size = envs.get_global_env("hyper_parameters.embed_size")
- init_stddev = 1.0
- scales = 1.0 / np.sqrt(data.shape[1])
-
- p_attr = fluid.param_attr.ParamAttr(
- name='%s_weight' % tag,
- initializer=fluid.initializer.NormalInitializer(
- loc=0.0, scale=init_stddev * scales))
-
- b_attr = fluid.ParamAttr(
- name='%s_bias' % tag, initializer=fluid.initializer.Constant(0.1))
-
- out = fluid.layers.fc(input=data,
- size=out_dim,
- act=active,
- param_attr=p_attr,
- bias_attr=b_attr,
- name=tag)
- return out
-
- def input_data(self):
+ def input_data(self, is_infer=False, **kwargs):
sparse_input_ids = [
fluid.data(
name="field_" + str(i),
@@ -55,26 +38,24 @@ class Model(ModelBase):
label_ctr = fluid.data(name="ctr", shape=[-1, 1], dtype="int64")
label_cvr = fluid.data(name="cvr", shape=[-1, 1], dtype="int64")
inputs = sparse_input_ids + [label_ctr] + [label_cvr]
- self._data_var.extend(inputs)
-
- return inputs
+ if is_infer:
+ return inputs
+ else:
+ return inputs
def net(self, inputs, is_infer=False):
- vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None,
- self._namespace)
- embed_size = envs.get_global_env("hyper_parameters.embed_size", None,
- self._namespace)
emb = []
+ # input feature data
for data in inputs[0:-2]:
feat_emb = fluid.embedding(
input=data,
- size=[vocab_size, embed_size],
+ size=[self.vocab_size, self.embed_size],
param_attr=fluid.ParamAttr(
name='dis_emb',
learning_rate=5,
initializer=fluid.initializer.Xavier(
- fan_in=embed_size, fan_out=embed_size)),
+ fan_in=self.embed_size, fan_out=self.embed_size)),
is_sparse=True)
field_emb = fluid.layers.sequence_pool(
input=feat_emb, pool_type='sum')
@@ -83,14 +64,14 @@ class Model(ModelBase):
# ctr
active = 'relu'
- ctr_fc1 = self.fc('ctr_fc1', concat_emb, 200, active)
- ctr_fc2 = self.fc('ctr_fc2', ctr_fc1, 80, active)
- ctr_out = self.fc('ctr_out', ctr_fc2, 2, 'softmax')
+ ctr_fc1 = self._fc('ctr_fc1', concat_emb, 200, active)
+ ctr_fc2 = self._fc('ctr_fc2', ctr_fc1, 80, active)
+ ctr_out = self._fc('ctr_out', ctr_fc2, 2, 'softmax')
# cvr
- cvr_fc1 = self.fc('cvr_fc1', concat_emb, 200, active)
- cvr_fc2 = self.fc('cvr_fc2', cvr_fc1, 80, active)
- cvr_out = self.fc('cvr_out', cvr_fc2, 2, 'softmax')
+ cvr_fc1 = self._fc('cvr_fc1', concat_emb, 200, active)
+ cvr_fc2 = self._fc('cvr_fc2', cvr_fc1, 80, active)
+ cvr_out = self._fc('cvr_out', cvr_fc2, 2, 'softmax')
ctr_clk = inputs[-2]
ctcvr_buy = inputs[-1]
@@ -127,15 +108,23 @@ class Model(ModelBase):
self._metrics["AUC_ctcvr"] = auc_ctcvr
self._metrics["BATCH_AUC_ctcvr"] = batch_auc_ctcvr
- def train_net(self):
- input_data = self.input_data()
- self.net(input_data)
-
- def infer_net(self):
- self._infer_data_var = self.input_data()
- self._infer_data_loader = fluid.io.DataLoader.from_generator(
- feed_list=self._infer_data_var,
- capacity=64,
- use_double_buffer=False,
- iterable=False)
- self.net(self._infer_data_var, is_infer=True)
+ def _fc(self, tag, data, out_dim, active='prelu'):
+
+ init_stddev = 1.0
+ scales = 1.0 / np.sqrt(data.shape[1])
+
+ p_attr = fluid.param_attr.ParamAttr(
+ name='%s_weight' % tag,
+ initializer=fluid.initializer.NormalInitializer(
+ loc=0.0, scale=init_stddev * scales))
+
+ b_attr = fluid.ParamAttr(
+ name='%s_bias' % tag, initializer=fluid.initializer.Constant(0.1))
+
+ out = fluid.layers.fc(input=data,
+ size=out_dim,
+ act=active,
+ param_attr=p_attr,
+ bias_attr=b_attr,
+ name=tag)
+ return out
diff --git a/models/multitask/mmoe/census_infer_reader.py b/models/multitask/mmoe/census_infer_reader.py
deleted file mode 100644
index fada3990fdcc756a2938c5a4fd763f022dda53c4..0000000000000000000000000000000000000000
--- a/models/multitask/mmoe/census_infer_reader.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-from paddlerec.core.reader import Reader
-
-
-class EvaluateReader(Reader):
- def init(self):
-
- pass
-
- def generate_sample(self, line):
- """
- Read the data line by line and process it as a dictionary
- """
-
- def reader():
- """
- This function needs to be implemented by the user, based on data format
- """
- l = line.strip().split(',')
- l = list(map(float, l))
- label_income = []
- label_marital = []
- data = l[2:]
- if int(l[1]) == 0:
- label_income = [1, 0]
- elif int(l[1]) == 1:
- label_income = [0, 1]
- if int(l[0]) == 0:
- label_marital = [1, 0]
- elif int(l[0]) == 1:
- label_marital = [0, 1]
- feature_name = ["input", "label_income", "label_marital"]
- yield zip(feature_name, [data] + [label_income] + [label_marital])
-
- return reader
diff --git a/models/multitask/mmoe/config.yaml b/models/multitask/mmoe/config.yaml
index e23332cda298cf0f9fd0d35b19f8fe8feb34a9b1..9f36f84991ea30ffeb1745bc2d769b19a9887ab2 100644
--- a/models/multitask/mmoe/config.yaml
+++ b/models/multitask/mmoe/config.yaml
@@ -12,43 +12,57 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-evaluate:
- reader:
- batch_size: 1
- class: "{workspace}/census_infer_reader.py"
- test_data_path: "{workspace}/data/train"
+workspace: "paddlerec.models.multitask.mmoe"
-train:
- trainer:
- # for cluster training
- strategy: "async"
+dataset:
+- name: dataset_train
+ batch_size: 1
+ type: QueueDataset
+ data_path: "{workspace}/data/train"
+ data_converter: "{workspace}/census_reader.py"
+- name: dataset_infer
+ batch_size: 1
+ type: QueueDataset
+ data_path: "{workspace}/data/train"
+ data_converter: "{workspace}/census_reader.py"
- epochs: 3
- workspace: "paddlerec.models.multitask.mmoe"
- device: cpu
+hyper_parameters:
+ feature_size: 499
+ expert_num: 8
+ gate_num: 2
+ expert_size: 16
+ tower_size: 8
+ optimizer:
+ class: adam
+ learning_rate: 0.001
+ strategy: async
- reader:
- batch_size: 1
- class: "{workspace}/census_reader.py"
- train_data_path: "{workspace}/data/train"
+#use infer_runner mode and modify 'phase' below if infer
+mode: train_runner
+#mode: infer_runner
- model:
- models: "{workspace}/model.py"
- hyper_parameters:
- feature_size: 499
- expert_num: 8
- gate_num: 2
- expert_size: 16
- tower_size: 8
- learning_rate: 0.001
- optimizer: adam
+runner:
+- name: train_runner
+ class: single_train
+ device: cpu
+ epochs: 3
+ save_checkpoint_interval: 2
+ save_inference_interval: 4
+ save_checkpoint_path: "increment"
+ save_inference_path: "inference"
+ print_interval: 10
+- name: infer_runner
+ class: single_infer
+ init_model_path: "increment/0"
+ device: cpu
+ epochs: 3
- save:
- increment:
- dirname: "increment"
- epoch_interval: 2
- save_last: True
- inference:
- dirname: "inference"
- epoch_interval: 4
- save_last: True
+phase:
+- name: train
+ model: "{workspace}/model.py"
+ dataset_name: dataset_train
+ thread_num: 1
+ #- name: infer
+ # model: "{workspace}/model.py"
+ # dataset_name: dataset_infer
+ # thread_num: 1
diff --git a/models/multitask/mmoe/data/run.sh b/models/multitask/mmoe/data/run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b60d42b37057593b1c16aa5fd91b8217a5a71bbf
--- /dev/null
+++ b/models/multitask/mmoe/data/run.sh
@@ -0,0 +1,16 @@
+mkdir train_data
+mkdir test_data
+mkdir data
+train_path="data/census-income.data"
+test_path="data/census-income.test"
+train_data_path="train_data/"
+test_data_path="test_data/"
+pip install -r requirements.txt
+
+wget -P data/ https://archive.ics.uci.edu/ml/machine-learning-databases/census-income-mld/census.tar.gz
+tar -zxvf data/census.tar.gz -C data/
+
+python data_preparation.py --train_path ${train_path} \
+ --test_path ${test_path} \
+ --train_data_path ${train_data_path}\
+ --test_data_path ${test_data_path}
diff --git a/models/multitask/mmoe/data/train/train_data.txt b/models/multitask/mmoe/data/train/train_data.txt
index 992314e443942c1b3e08a7db88bf2c1d7354c451..ba385736663d5efd4321692d1fbafda8bbf585c1 100644
--- a/models/multitask/mmoe/data/train/train_data.txt
+++ b/models/multitask/mmoe/data/train/train_data.txt
@@ -1,4 +1,24 @@
0,0,73,0,0,0,0,1700.09,0,0,2,0,95,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+0,0,73,0,0,0,0,1700.09,0,0,2,0,95,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+0,0,73,0,0,0,0,1700.09,0,0,2,0,95,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+0,0,58,0,0,0,0,1053.55,1,0,2,52,94,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+1,0,18,0,0,0,0,991.95,0,0,2,0,95,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0
+1,0,9,0,0,0,0,1758.14,0,0,0,0,94,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+1,0,10,0,0,0,0,1069.16,0,0,0,0,94,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+0,0,48,1200,0,0,0,162.61,1,2,2,52,95,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+0,0,42,0,5178,0,0,1535.86,6,0,2,52,94,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+1,0,28,0,0,0,0,898.83,4,0,2,30,95,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+0,0,47,876,0,0,0,1661.53,5,0,2,52,95,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+0,0,34,0,0,0,0,1146.79,6,0,2,52,94,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+0,0,58,0,0,0,0,1053.55,1,0,2,52,94,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+1,0,18,0,0,0,0,991.95,0,0,2,0,95,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0
+1,0,9,0,0,0,0,1758.14,0,0,0,0,94,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+1,0,10,0,0,0,0,1069.16,0,0,0,0,94,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+0,0,48,1200,0,0,0,162.61,1,2,2,52,95,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+0,0,42,0,5178,0,0,1535.86,6,0,2,52,94,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+1,0,28,0,0,0,0,898.83,4,0,2,30,95,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+0,0,47,876,0,0,0,1661.53,5,0,2,52,95,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
+0,0,34,0,0,0,0,1146.79,6,0,2,52,94,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
0,0,58,0,0,0,0,1053.55,1,0,2,52,94,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
1,0,18,0,0,0,0,991.95,0,0,2,0,95,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0
1,0,9,0,0,0,0,1758.14,0,0,0,0,94,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
diff --git a/models/multitask/mmoe/model.py b/models/multitask/mmoe/model.py
index 035733690f46960906c902dbe240603acd136565..309da6a31e8754110fb8c9d50971bc4dc9aff364 100644
--- a/models/multitask/mmoe/model.py
+++ b/models/multitask/mmoe/model.py
@@ -22,53 +22,51 @@ class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
- def MMOE(self, is_infer=False):
- feature_size = envs.get_global_env("hyper_parameters.feature_size",
- None, self._namespace)
- expert_num = envs.get_global_env("hyper_parameters.expert_num", None,
- self._namespace)
- gate_num = envs.get_global_env("hyper_parameters.gate_num", None,
- self._namespace)
- expert_size = envs.get_global_env("hyper_parameters.expert_size", None,
- self._namespace)
- tower_size = envs.get_global_env("hyper_parameters.tower_size", None,
- self._namespace)
-
- input_data = fluid.data(
- name="input", shape=[-1, feature_size], dtype="float32")
+ def _init_hyper_parameters(self):
+ self.feature_size = envs.get_global_env(
+ "hyper_parameters.feature_size")
+ self.expert_num = envs.get_global_env("hyper_parameters.expert_num")
+ self.gate_num = envs.get_global_env("hyper_parameters.gate_num")
+ self.expert_size = envs.get_global_env("hyper_parameters.expert_size")
+ self.tower_size = envs.get_global_env("hyper_parameters.tower_size")
+
+ def input_data(self, is_infer=False, **kwargs):
+ inputs = fluid.data(
+ name="input", shape=[-1, self.feature_size], dtype="float32")
label_income = fluid.data(
name="label_income", shape=[-1, 2], dtype="float32", lod_level=0)
label_marital = fluid.data(
name="label_marital", shape=[-1, 2], dtype="float32", lod_level=0)
if is_infer:
- self._infer_data_var = [input_data, label_income, label_marital]
- self._infer_data_loader = fluid.io.DataLoader.from_generator(
- feed_list=self._infer_data_var,
- capacity=64,
- use_double_buffer=False,
- iterable=False)
-
- self._data_var.extend([input_data, label_income, label_marital])
+ return [inputs, label_income, label_marital]
+ else:
+ return [inputs, label_income, label_marital]
+
+ def net(self, inputs, is_infer=False):
+ input_data = inputs[0]
+ label_income = inputs[1]
+ label_marital = inputs[2]
+
# f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper
expert_outputs = []
- for i in range(0, expert_num):
+ for i in range(0, self.expert_num):
expert_output = fluid.layers.fc(
input=input_data,
- size=expert_size,
+ size=self.expert_size,
act='relu',
bias_attr=fluid.ParamAttr(learning_rate=1.0),
name='expert_' + str(i))
expert_outputs.append(expert_output)
expert_concat = fluid.layers.concat(expert_outputs, axis=1)
- expert_concat = fluid.layers.reshape(expert_concat,
- [-1, expert_num, expert_size])
+ expert_concat = fluid.layers.reshape(
+ expert_concat, [-1, self.expert_num, self.expert_size])
# g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper
output_layers = []
- for i in range(0, gate_num):
+ for i in range(0, self.gate_num):
cur_gate = fluid.layers.fc(
input=input_data,
- size=expert_num,
+ size=self.expert_num,
act='softmax',
bias_attr=fluid.ParamAttr(learning_rate=1.0),
name='gate_' + str(i))
@@ -78,7 +76,7 @@ class Model(ModelBase):
cur_gate_expert = fluid.layers.reduce_sum(cur_gate_expert, dim=1)
# Build tower layer
cur_tower = fluid.layers.fc(input=cur_gate_expert,
- size=tower_size,
+ size=self.tower_size,
act='relu',
name='task_layer_' + str(i))
out = fluid.layers.fc(input=cur_tower,
@@ -127,8 +125,5 @@ class Model(ModelBase):
self._metrics["AUC_marital"] = auc_marital
self._metrics["BATCH_AUC_marital"] = batch_auc_2
- def train_net(self):
- self.MMOE()
-
def infer_net(self):
- self.MMOE(is_infer=True)
+ pass
diff --git a/models/multitask/readme.md b/models/multitask/readme.md
index 10e0641060f74b67b4987d14a1c4aad27a25b103..07a6c01d77b72ed47153c3fad92521429a4769a2 100755
--- a/models/multitask/readme.md
+++ b/models/multitask/readme.md
@@ -9,7 +9,9 @@
* [整体介绍](#整体介绍)
* [多任务模型列表](#多任务模型列表)
* [使用教程](#使用教程)
- * [训练&预测](#训练&预测)
+ * [数据处理](#数据处理)
+ * [训练](#训练)
+ * [预测](#预测)
* [效果对比](#效果对比)
* [模型效果列表](#模型效果列表)
@@ -40,14 +42,49 @@
-## 使用教程
-### 训练&预测
+## 使用教程(快速开始)
```shell
python -m paddlerec.run -m paddlerec.models.multitask.mmoe # mmoe
python -m paddlerec.run -m paddlerec.models.multitask.share-bottom # share-bottom
python -m paddlerec.run -m paddlerec.models.multitask.esmm # esmm
```
+## 使用教程(复现论文)
+### 注意
+为了方便使用者能够快速的跑通每一个模型,我们在每个模型下都提供了样例数据,并且调整了batch_size等超参以便在样例数据上更加友好的显示训练&测试日志。如果需要复现readme中的效果请按照如下表格调整batch_size等超参,并使用提供的脚本下载对应数据集以及数据预处理。
+
+| 模型 | batch_size | thread_num | epoch_num |
+| :------------------: | :--------------------: | :--------------------: | :--------------------: |
+| Share-Bottom | 32 | 1 | 400 |
+| MMoE | 32 | 1 | 400 |
+| ESMM | 64 | 2 | 100 |
+
+### 数据处理
+参考每个模型目录数据下载&预处理脚本
+
+```
+sh run.sh
+```
+
+### 训练
+```
+cd modles/multitask/mmoe # 进入选定好的排序模型的目录 以MMoE为例
+python -m paddlerec.run -m ./config.yaml # 自定义修改超参后,指定配置文件,使用自定义配置
+```
+
+### 预测
+```
+# 修改对应模型的config.yaml, workspace配置为当前目录的绝对路径
+# 修改对应模型的config.yaml,mode配置infer_runner
+# 示例: mode: train_runner -> mode: infer_runner
+# infer_runner中 class配置为 class: single_infer
+# 修改phase阶段为infer的配置,参照config注释
+
+# 修改完config.yaml后 执行:
+python -m paddlerec.run -m ./config.yaml # 以MMoE为例
+```
+
+
## 效果对比
### 模型效果列表
diff --git a/models/multitask/share-bottom/census_infer_reader.py b/models/multitask/share-bottom/census_infer_reader.py
deleted file mode 100644
index c62de8e69ce6ccfbb4df1e1252d9630a84fc56b3..0000000000000000000000000000000000000000
--- a/models/multitask/share-bottom/census_infer_reader.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-from paddlerec.core.reader import Reader
-
-
-class EvaluateReader(Reader):
- def init(self):
- pass
-
- def generate_sample(self, line):
- """
- Read the data line by line and process it as a dictionary
- """
-
- def reader():
- """
- This function needs to be implemented by the user, based on data format
- """
- l = line.strip().split(',')
- l = list(map(float, l))
- label_income = []
- label_marital = []
- data = l[2:]
- if int(l[1]) == 0:
- label_income = [1, 0]
- elif int(l[1]) == 1:
- label_income = [0, 1]
- if int(l[0]) == 0:
- label_marital = [1, 0]
- elif int(l[0]) == 1:
- label_marital = [0, 1]
- feature_name = ["input", "label_income", "label_marital"]
- yield zip(feature_name, [data] + [label_income] + [label_marital])
-
- return reader
diff --git a/models/multitask/share-bottom/config.yaml b/models/multitask/share-bottom/config.yaml
index 591b6800cca0f44b2e1503caabe21c32fee771bd..3a44b8e7b23a545e5daf67a789a0c3537f614c4e 100644
--- a/models/multitask/share-bottom/config.yaml
+++ b/models/multitask/share-bottom/config.yaml
@@ -12,42 +12,56 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-evaluate:
- reader:
- batch_size: 1
- class: "{workspace}/census_infer_reader.py"
- test_data_path: "{workspace}/data/train"
+workspace: "paddlerec.models.multitask.share-bottom"
-train:
- trainer:
- # for cluster training
- strategy: "async"
+dataset:
+- name: dataset_train
+ batch_size: 1
+ type: QueueDataset
+ data_path: "{workspace}/data/train"
+ data_converter: "{workspace}/census_reader.py"
+- name: dataset_infer
+ batch_size: 1
+ type: QueueDataset
+ data_path: "{workspace}/data/train"
+ data_converter: "{workspace}/census_reader.py"
- epochs: 3
- workspace: "paddlerec.models.multitask.share-bottom"
- device: cpu
+hyper_parameters:
+ feature_size: 499
+ bottom_size: 117
+ tower_nums: 2
+ tower_size: 8
+ optimizer:
+ class: adam
+ learning_rate: 0.001
+ strategy: async
- reader:
- batch_size: 2
- class: "{workspace}/census_reader.py"
- train_data_path: "{workspace}/data/train"
+#use infer_runner mode and modify 'phase' below if infer
+mode: train_runner
+#mode: infer_runner
- model:
- models: "{workspace}/model.py"
- hyper_parameters:
- feature_size: 499
- bottom_size: 117
- tower_nums: 2
- tower_size: 8
- learning_rate: 0.001
- optimizer: adam
+runner:
+- name: train_runner
+ class: single_train
+ device: cpu
+ epochs: 3
+ save_checkpoint_interval: 2
+ save_inference_interval: 4
+ save_checkpoint_path: "increment"
+ save_inference_path: "inference"
+ print_interval: 5
+- name: infer_runner
+ class: single_infer
+ init_model_path: "increment/0"
+ device: cpu
+ epochs: 3
- save:
- increment:
- dirname: "increment"
- epoch_interval: 2
- save_last: True
- inference:
- dirname: "inference"
- epoch_interval: 4
- save_last: True
+phase:
+- name: train
+ model: "{workspace}/model.py"
+ dataset_name: dataset_train
+ thread_num: 1
+ #- name: infer
+ # model: "{workspace}/model.py"
+ # dataset_name: dataset_infer
+ # thread_num: 1
diff --git a/models/multitask/share-bottom/model.py b/models/multitask/share-bottom/model.py
index f19ecbe1c43323e30cb9a44eb281f31c68b69909..0275d3a10b3dd4f35388da10b303d86421228695 100644
--- a/models/multitask/share-bottom/model.py
+++ b/models/multitask/share-bottom/model.py
@@ -22,46 +22,42 @@ class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
- def model(self, is_infer=False):
-
- feature_size = envs.get_global_env("hyper_parameters.feature_size",
- None, self._namespace)
- bottom_size = envs.get_global_env("hyper_parameters.bottom_size", None,
- self._namespace)
- tower_size = envs.get_global_env("hyper_parameters.tower_size", None,
- self._namespace)
- tower_nums = envs.get_global_env("hyper_parameters.tower_nums", None,
- self._namespace)
-
- input_data = fluid.data(
- name="input", shape=[-1, feature_size], dtype="float32")
+ def _init_hyper_parameters(self):
+ self.feature_size = envs.get_global_env(
+ "hyper_parameters.feature_size")
+ self.bottom_size = envs.get_global_env("hyper_parameters.bottom_size")
+ self.tower_size = envs.get_global_env("hyper_parameters.tower_size")
+ self.tower_nums = envs.get_global_env("hyper_parameters.tower_nums")
+
+ def input_data(self, is_infer=False, **kwargs):
+ inputs = fluid.data(
+ name="input", shape=[-1, self.feature_size], dtype="float32")
label_income = fluid.data(
name="label_income", shape=[-1, 2], dtype="float32", lod_level=0)
label_marital = fluid.data(
name="label_marital", shape=[-1, 2], dtype="float32", lod_level=0)
-
if is_infer:
- self._infer_data_var = [input_data, label_income, label_marital]
- self._infer_data_loader = fluid.io.DataLoader.from_generator(
- feed_list=self._infer_data_var,
- capacity=64,
- use_double_buffer=False,
- iterable=False)
+ return [inputs, label_income, label_marital]
+ else:
+ return [inputs, label_income, label_marital]
- self._data_var.extend([input_data, label_income, label_marital])
+ def net(self, inputs, is_infer=False):
+ input_data = inputs[0]
+ label_income = inputs[1]
+ label_marital = inputs[2]
bottom_output = fluid.layers.fc(
input=input_data,
- size=bottom_size,
+ size=self.bottom_size,
act='relu',
bias_attr=fluid.ParamAttr(learning_rate=1.0),
name='bottom_output')
# Build tower layer from bottom layer
output_layers = []
- for index in range(tower_nums):
+ for index in range(self.tower_nums):
tower_layer = fluid.layers.fc(input=bottom_output,
- size=tower_size,
+ size=self.tower_size,
act='relu',
name='task_layer_' + str(index))
output_layer = fluid.layers.fc(input=tower_layer,
@@ -107,9 +103,3 @@ class Model(ModelBase):
self._metrics["BATCH_AUC_income"] = batch_auc_1
self._metrics["AUC_marital"] = auc_marital
self._metrics["BATCH_AUC_marital"] = batch_auc_2
-
- def train_net(self):
- self.model()
-
- def infer_net(self):
- self.model(is_infer=True)
diff --git a/models/rank/dcn/config.yaml b/models/rank/dcn/config.yaml
index 58c88f0cfed18e2dbbb19c9a097dbe9b6d61c814..390b460a84d9e212867d372c6fd542c0f1f2b478 100755
--- a/models/rank/dcn/config.yaml
+++ b/models/rank/dcn/config.yaml
@@ -12,43 +12,66 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-train:
- trainer:
- # for cluster training
- strategy: "async"
-
- epochs: 10
- workspace: "paddlerec.models.rank.dcn"
-
- reader:
- batch_size: 2
- train_data_path: "{workspace}/data/sample_data/train"
- feat_dict_name: "{workspace}/data/vocab"
+
+# global settings
+debug: false
+workspace: "paddlerec.models.rank.dcn"
+
+dataset:
+ - name: train_sample
+ type: QueueDataset
+ batch_size: 5
+ data_path: "{workspace}/data/sample_data/train"
+ sparse_slots: "label C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26"
+ dense_slots: "I1:1 I2:1 I3:1 I4:1 I5:1 I6:1 I7:1 I8:1 I9:1 I10:1 I11:1 I12:1 I13:1"
+ - name: infer_sample
+ type: QueueDataset
+ batch_size: 5
+ data_path: "{workspace}/data/sample_data/infer"
sparse_slots: "label C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26"
dense_slots: "I1:1 I2:1 I3:1 I4:1 I5:1 I6:1 I7:1 I8:1 I9:1 I10:1 I11:1 I12:1 I13:1"
- model:
- models: "{workspace}/model.py"
- hyper_parameters:
- cross_num: 2
- dnn_hidden_units: [128, 128]
- l2_reg_cross: 0.00005
- dnn_use_bn: False
- clip_by_norm: 100.0
- cat_feat_num: "{workspace}/data/sample_data/cat_feature_num.txt"
- is_sparse: False
- is_test: False
- num_field: 39
- learning_rate: 0.0001
- act: "relu"
- optimizer: adam
-
- save:
- increment:
- dirname: "increment"
- epoch_interval: 2
- save_last: True
- inference:
- dirname: "inference"
- epoch_interval: 4
- save_last: True
+hyper_parameters:
+ optimizer:
+ class: Adam
+ learning_rate: 0.0001
+ # 用户自定义配置
+ cross_num: 2
+ dnn_hidden_units: [128, 128]
+ l2_reg_cross: 0.00005
+ dnn_use_bn: False
+ clip_by_norm: 100.0
+ cat_feat_num: "{workspace}/data/sample_data/cat_feature_num.txt"
+ is_sparse: False
+
+
+mode: train_runner
+# if infer, change mode to "infer_runner" and change phase to "infer_phase"
+
+runner:
+ - name: train_runner
+ trainer_class: single_train
+ epochs: 1
+ device: cpu
+ init_model_path: ""
+ save_checkpoint_interval: 1
+ save_inference_interval: 1
+ save_checkpoint_path: "increment"
+ save_inference_path: "inference"
+ print_interval: 1
+ - name: infer_runner
+ trainer_class: single_infer
+ epochs: 1
+ device: cpu
+ init_model_path: "increment/0"
+ print_interval: 1
+
+phase:
+- name: phase1
+ model: "{workspace}/model.py"
+ dataset_name: train_sample
+ thread_num: 1
+#- name: infer_phase
+# model: "{workspace}/model.py"
+# dataset_name: infer_sample
+# thread_num: 1
diff --git a/models/rank/dcn/data/sample_data/infer/infer_sample_data b/models/rank/dcn/data/sample_data/infer/infer_sample_data
new file mode 100644
index 0000000000000000000000000000000000000000..4aa6d249feecf542a5ce947f510bded60aa6414f
--- /dev/null
+++ b/models/rank/dcn/data/sample_data/infer/infer_sample_data
@@ -0,0 +1,10 @@
+label:0 I1:0.69314718056 I2:1.60943791243 I3:1.79175946923 I4:0.0 I5:7.23201033166 I6:1.60943791243 I7:2.77258872224 I8:1.09861228867 I9:5.20400668708 I10:0.69314718056 I11:1.09861228867 I12:0 I13:1.09861228867 C1:95 C2:398 C3:0 C4:0 C5:53 C6:1 C7:73 C8:71 C9:3 C10:1974 C11:832 C12:0 C13:875 C14:8 C15:1764 C16:0 C17:5 C18:390 C19:226 C20:1 C21:0 C22:0 C23:8 C24:1759 C25:1 C26:862
+label:0 I1:1.09861228867 I2:1.38629436112 I3:3.80666248977 I4:0.69314718056 I5:4.63472898823 I6:2.19722457734 I7:1.09861228867 I8:1.09861228867 I9:1.60943791243 I10:0.69314718056 I11:0.69314718056 I12:0 I13:1.60943791243 C1:95 C2:200 C3:1184 C4:1929 C5:53 C6:4 C7:1477 C8:2 C9:3 C10:1283 C11:1567 C12:1048 C13:271 C14:6 C15:1551 C16:899 C17:1 C18:162 C19:226 C20:2 C21:575 C22:0 C23:8 C24:1615 C25:1 C26:659
+label:0 I1:1.09861228867 I2:1.38629436112 I3:0.69314718056 I4:2.7080502011 I5:6.64378973315 I6:4.49980967033 I7:1.60943791243 I8:1.09861228867 I9:5.50533153593 I10:0.69314718056 I11:1.38629436112 I12:1.38629436112 I13:3.82864139649 C1:123 C2:378 C3:991 C4:197 C5:53 C6:1 C7:689 C8:2 C9:3 C10:245 C11:623 C12:1482 C13:887 C14:21 C15:106 C16:720 C17:3 C18:768 C19:0 C20:0 C21:1010 C22:1 C23:8 C24:720 C25:0 C26:0
+label:0 I1:0 I2:6.79905586206 I3:0 I4:0 I5:8.38776764398 I6:0 I7:0.0 I8:0.0 I9:0.0 I10:0 I11:0.0 I12:0 I13:0 C1:95 C2:227 C3:0 C4:219 C5:53 C6:4 C7:3174 C8:2 C9:3 C10:569 C11:1963 C12:0 C13:1150 C14:21 C15:1656 C16:0 C17:6 C18:584 C19:0 C20:0 C21:0 C22:0 C23:8 C24:954 C25:0 C26:0
+label:0 I1:1.38629436112 I2:1.09861228867 I3:0 I4:0.0 I5:1.09861228867 I6:0.0 I7:1.38629436112 I8:0.0 I9:0.0 I10:0.69314718056 I11:0.69314718056 I12:0 I13:0.0 C1:121 C2:147 C3:0 C4:1356 C5:53 C6:7 C7:2120 C8:2 C9:3 C10:703 C11:1678 C12:1210 C13:1455 C14:8 C15:538 C16:1276 C17:6 C18:346 C19:0 C20:0 C21:944 C22:0 C23:10 C24:355 C25:0 C26:0
+label:0 I1:0 I2:1.09861228867 I3:0 I4:0 I5:9.45915167004 I6:0 I7:0.0 I8:0.0 I9:1.94591014906 I10:0 I11:0.0 I12:0 I13:0 C1:14 C2:75 C3:993 C4:480 C5:50 C6:6 C7:1188 C8:2 C9:3 C10:245 C11:1037 C12:1365 C13:1421 C14:21 C15:786 C16:5 C17:2 C18:555 C19:0 C20:0 C21:1408 C22:6 C23:7 C24:753 C25:0 C26:0
+label:0 I1:0 I2:1.60943791243 I3:1.09861228867 I4:0 I5:8.06117135969 I6:0 I7:0.0 I8:0.69314718056 I9:1.09861228867 I10:0 I11:0.0 I12:0 I13:0 C1:139 C2:343 C3:553 C4:828 C5:50 C6:4 C7:0 C8:2 C9:3 C10:245 C11:2081 C12:260 C13:455 C14:21 C15:122 C16:1159 C17:2 C18:612 C19:0 C20:0 C21:1137 C22:0 C23:1 C24:1583 C25:0 C26:0
+label:1 I1:0.69314718056 I2:2.07944154168 I3:1.09861228867 I4:0.0 I5:0.0 I6:0.0 I7:0.69314718056 I8:0.0 I9:0.0 I10:0.69314718056 I11:0.69314718056 I12:0 I13:0.0 C1:95 C2:227 C3:0 C4:1567 C5:21 C6:7 C7:2496 C8:71 C9:3 C10:1913 C11:2212 C12:0 C13:673 C14:21 C15:1656 C16:0 C17:5 C18:584 C19:0 C20:0 C21:0 C22:0 C23:10 C24:954 C25:0 C26:0
+label:0 I1:0 I2:3.87120101091 I3:1.60943791243 I4:2.19722457734 I5:9.85277303799 I6:5.52146091786 I7:3.36729582999 I8:3.4657359028 I9:4.9558270576 I10:0 I11:0.69314718056 I12:0 I13:2.19722457734 C1:14 C2:14 C3:454 C4:197 C5:53 C6:1 C7:1386 C8:2 C9:3 C10:0 C11:1979 C12:205 C13:214 C14:6 C15:1837 C16:638 C17:5 C18:6 C19:0 C20:0 C21:70 C22:0 C23:10 C24:720 C25:0 C26:0
+label:0 I1:0 I2:3.66356164613 I3:0 I4:0.69314718056 I5:10.4263800775 I6:3.09104245336 I7:0.69314718056 I8:1.09861228867 I9:1.38629436112 I10:0 I11:0.69314718056 I12:0 I13:0.69314718056 C1:14 C2:179 C3:120 C4:746 C5:53 C6:0 C7:1312 C8:2 C9:3 C10:1337 C11:1963 C12:905 C13:1150 C14:21 C15:1820 C16:328 C17:9 C18:77 C19:0 C20:0 C21:311 C22:0 C23:10 C24:89 C25:0 C26:0
diff --git a/models/rank/dcn/model.py b/models/rank/dcn/model.py
index 89113a315284845f094857a879d70156956d3065..52764c3e2122c408078c65875427af74c4ae83da 100755
--- a/models/rank/dcn/model.py
+++ b/models/rank/dcn/model.py
@@ -24,44 +24,21 @@ class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
- def init_network(self):
+ def _init_hyper_parameters(self):
self.cross_num = envs.get_global_env("hyper_parameters.cross_num",
- None, self._namespace)
+ None)
self.dnn_hidden_units = envs.get_global_env(
- "hyper_parameters.dnn_hidden_units", None, self._namespace)
+ "hyper_parameters.dnn_hidden_units", None)
self.l2_reg_cross = envs.get_global_env(
- "hyper_parameters.l2_reg_cross", None, self._namespace)
+ "hyper_parameters.l2_reg_cross", None)
self.dnn_use_bn = envs.get_global_env("hyper_parameters.dnn_use_bn",
- None, self._namespace)
+ None)
self.clip_by_norm = envs.get_global_env(
- "hyper_parameters.clip_by_norm", None, self._namespace)
- cat_feat_num = envs.get_global_env("hyper_parameters.cat_feat_num",
- None, self._namespace)
-
- self.sparse_inputs = self._sparse_data_var[1:]
- self.dense_inputs = self._dense_data_var
- self.target_input = self._sparse_data_var[0]
-
- cat_feat_dims_dict = OrderedDict()
- for line in open(cat_feat_num):
- spls = line.strip().split()
- assert len(spls) == 2
- cat_feat_dims_dict[spls[0]] = int(spls[1])
- self.cat_feat_dims_dict = cat_feat_dims_dict if cat_feat_dims_dict else OrderedDict(
- )
+ "hyper_parameters.clip_by_norm", None)
+ self.cat_feat_num = envs.get_global_env(
+ "hyper_parameters.cat_feat_num", None)
self.is_sparse = envs.get_global_env("hyper_parameters.is_sparse",
- None, self._namespace)
-
- self.dense_feat_names = [i.name for i in self.dense_inputs]
- self.sparse_feat_names = [i.name for i in self.sparse_inputs]
-
- # {feat_name: dims}
- self.feat_dims_dict = OrderedDict(
- [(feat_name, 1) for feat_name in self.dense_feat_names])
- self.feat_dims_dict.update(self.cat_feat_dims_dict)
-
- self.net_input = None
- self.loss = None
+ None)
def _create_embedding_input(self):
# sparse embedding
@@ -121,9 +98,29 @@ class Model(ModelBase):
def _l2_loss(self, w):
return fluid.layers.reduce_sum(fluid.layers.square(w))
- def train_net(self):
- self._init_slots()
- self.init_network()
+ def net(self, inputs, is_infer=False):
+ self.sparse_inputs = self._sparse_data_var[1:]
+ self.dense_inputs = self._dense_data_var
+ self.target_input = self._sparse_data_var[0]
+
+ cat_feat_dims_dict = OrderedDict()
+ for line in open(self.cat_feat_num):
+ spls = line.strip().split()
+ assert len(spls) == 2
+ cat_feat_dims_dict[spls[0]] = int(spls[1])
+ self.cat_feat_dims_dict = cat_feat_dims_dict if cat_feat_dims_dict else OrderedDict(
+ )
+
+ self.dense_feat_names = [i.name for i in self.dense_inputs]
+ self.sparse_feat_names = [i.name for i in self.sparse_inputs]
+
+ # {feat_name: dims}
+ self.feat_dims_dict = OrderedDict(
+ [(feat_name, 1) for feat_name in self.dense_feat_names])
+ self.feat_dims_dict.update(self.cat_feat_dims_dict)
+
+ self.net_input = None
+ self.loss = None
self.net_input = self._create_embedding_input()
@@ -146,6 +143,9 @@ class Model(ModelBase):
self._metrics["AUC"] = auc_var
self._metrics["BATCH_AUC"] = batch_auc_var
+ if is_infer:
+ self._infer_results["AUC"] = auc_var
+
# logloss
logloss = fluid.layers.log_loss(
self.prob, fluid.layers.cast(
@@ -157,11 +157,7 @@ class Model(ModelBase):
self.loss = self.avg_logloss + l2_reg_cross_loss
self._cost = self.loss
- def optimizer(self):
- learning_rate = envs.get_global_env("hyper_parameters.learning_rate",
- None, self._namespace)
- optimizer = fluid.optimizer.Adam(learning_rate, lazy_mode=True)
- return optimizer
-
- def infer_net(self):
- self.train_net()
+ #def optimizer(self):
+ #
+ # optimizer = fluid.optimizer.Adam(self.learning_rate, lazy_mode=True)
+ # return optimizer
diff --git a/models/rank/deepfm/config.yaml b/models/rank/deepfm/config.yaml
index 956b65b0c13f9242e8c84156dcfc535cf7fffae7..d1d25c2c4c05c82f8ee0b9554563d2f310c2ac01 100755
--- a/models/rank/deepfm/config.yaml
+++ b/models/rank/deepfm/config.yaml
@@ -12,39 +12,65 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-train:
- trainer:
- # for cluster training
- strategy: "async"
-
- epochs: 10
- workspace: "paddlerec.models.rank.deepfm"
-
- reader:
- batch_size: 2
- train_data_path: "{workspace}/data/sample_data/train"
- feat_dict_name: "{workspace}/data/sample_data/feat_dict_10.pkl2"
+# global settings
+debug: false
+workspace: "paddlerec.models.rank.deepfm"
+
+
+dataset:
+ - name: train_sample
+ type: QueueDataset
+ batch_size: 5
+ data_path: "{workspace}/data/sample_data/train"
+ sparse_slots: "label feat_idx"
+ dense_slots: "feat_value:39"
+ - name: infer_sample
+ type: QueueDataset
+ batch_size: 5
+ data_path: "{workspace}/data/sample_data/train"
sparse_slots: "label feat_idx"
dense_slots: "feat_value:39"
- model:
- models: "{workspace}/model.py"
- hyper_parameters:
- sparse_feature_number: 1086460
- sparse_feature_dim: 9
- num_field: 39
- fc_sizes: [400, 400, 400]
- learning_rate: 0.0001
- reg: 0.001
- act: "relu"
- optimizer: SGD
-
- save:
- increment:
- dirname: "increment"
- epoch_interval: 2
- save_last: True
- inference:
- dirname: "inference"
- epoch_interval: 4
- save_last: True
+hyper_parameters:
+ optimizer:
+ class: SGD
+ learning_rate: 0.0001
+ sparse_feature_number: 1086460
+ sparse_feature_dim: 9
+ num_field: 39
+ fc_sizes: [400, 400, 400]
+ reg: 0.001
+ act: "relu"
+
+
+mode: train_runner
+# if infer, change mode to "infer_runner" and change phase to "infer_phase"
+
+runner:
+ - name: train_runner
+ trainer_class: single_train
+ epochs: 2
+ device: cpu
+ init_model_path: ""
+ save_checkpoint_interval: 1
+ save_inference_interval: 1
+ save_checkpoint_path: "increment"
+ save_inference_path: "inference"
+ print_interval: 1
+ - name: infer_runner
+ trainer_class: single_infer
+ epochs: 1
+ device: cpu
+ init_model_path: "increment/0"
+ print_interval: 1
+
+
+phase:
+- name: phase1
+ model: "{workspace}/model.py"
+ dataset_name: train_sample
+ thread_num: 1
+#- name: infer_phase
+# model: "{workspace}/model.py"
+# dataset_name: infer_sample
+# thread_num: 1
diff --git a/models/rank/deepfm/model.py b/models/rank/deepfm/model.py
index deb63c40f2aecc9ee6469f34338d858b09daf067..8ac8df134d08550c0db06e9aacbad21dbd74cfe9 100755
--- a/models/rank/deepfm/model.py
+++ b/models/rank/deepfm/model.py
@@ -24,42 +24,46 @@ class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
- def deepfm_net(self):
+ def _init_hyper_parameters(self):
+ self.sparse_feature_number = envs.get_global_env(
+ "hyper_parameters.sparse_feature_number", None)
+ self.sparse_feature_dim = envs.get_global_env(
+ "hyper_parameters.sparse_feature_dim", None)
+ self.num_field = envs.get_global_env("hyper_parameters.num_field",
+ None)
+ self.reg = envs.get_global_env("hyper_parameters.reg", 1e-4)
+ self.layer_sizes = envs.get_global_env("hyper_parameters.fc_sizes",
+ None)
+ self.act = envs.get_global_env("hyper_parameters.act", None)
+
+ def net(self, inputs, is_infer=False):
init_value_ = 0.1
is_distributed = True if envs.get_trainer() == "CtrTrainer" else False
- sparse_feature_number = envs.get_global_env(
- "hyper_parameters.sparse_feature_number", None, self._namespace)
- sparse_feature_dim = envs.get_global_env(
- "hyper_parameters.sparse_feature_dim", None, self._namespace)
# ------------------------- network input --------------------------
- num_field = envs.get_global_env("hyper_parameters.num_field", None,
- self._namespace)
-
raw_feat_idx = self._sparse_data_var[1]
raw_feat_value = self._dense_data_var[0]
self.label = self._sparse_data_var[0]
feat_idx = raw_feat_idx
feat_value = fluid.layers.reshape(
- raw_feat_value, [-1, num_field, 1]) # None * num_field * 1
+ raw_feat_value, [-1, self.num_field, 1]) # None * num_field * 1
- reg = envs.get_global_env("hyper_parameters.reg", 1e-4,
- self._namespace)
first_weights_re = fluid.embedding(
input=feat_idx,
is_sparse=True,
is_distributed=is_distributed,
dtype='float32',
- size=[sparse_feature_number + 1, 1],
+ size=[self.sparse_feature_number + 1, 1],
padding_idx=0,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.TruncatedNormalInitializer(
loc=0.0, scale=init_value_),
- regularizer=fluid.regularizer.L1DecayRegularizer(reg)))
+ regularizer=fluid.regularizer.L1DecayRegularizer(self.reg)))
first_weights = fluid.layers.reshape(
- first_weights_re, shape=[-1, num_field, 1]) # None * num_field * 1
+ first_weights_re,
+ shape=[-1, self.num_field, 1]) # None * num_field * 1
y_first_order = fluid.layers.reduce_sum((first_weights * feat_value),
1)
@@ -70,16 +74,17 @@ class Model(ModelBase):
is_sparse=True,
is_distributed=is_distributed,
dtype='float32',
- size=[sparse_feature_number + 1, sparse_feature_dim],
+ size=[self.sparse_feature_number + 1, self.sparse_feature_dim],
padding_idx=0,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.TruncatedNormalInitializer(
loc=0.0,
- scale=init_value_ / math.sqrt(float(sparse_feature_dim)))))
+ scale=init_value_ /
+ math.sqrt(float(self.sparse_feature_dim)))))
feat_embeddings = fluid.layers.reshape(
feat_embeddings_re,
- shape=[-1, num_field,
- sparse_feature_dim]) # None * num_field * embedding_size
+ shape=[-1, self.num_field, self.sparse_feature_dim
+ ]) # None * num_field * embedding_size
feat_embeddings = feat_embeddings * feat_value # None * num_field * embedding_size
# sum_square part
@@ -101,17 +106,13 @@ class Model(ModelBase):
# ------------------------- DNN --------------------------
- layer_sizes = envs.get_global_env("hyper_parameters.fc_sizes", None,
- self._namespace)
- act = envs.get_global_env("hyper_parameters.act", None,
- self._namespace)
- y_dnn = fluid.layers.reshape(feat_embeddings,
- [-1, num_field * sparse_feature_dim])
- for s in layer_sizes:
+ y_dnn = fluid.layers.reshape(
+ feat_embeddings, [-1, self.num_field * self.sparse_feature_dim])
+ for s in self.layer_sizes:
y_dnn = fluid.layers.fc(
input=y_dnn,
size=s,
- act=act,
+ act=self.act,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.TruncatedNormalInitializer(
loc=0.0, scale=init_value_ / math.sqrt(float(10)))),
@@ -133,21 +134,12 @@ class Model(ModelBase):
self.predict = fluid.layers.sigmoid(y_first_order + y_second_order +
y_dnn)
-
- def train_net(self):
- self._init_slots()
- self.deepfm_net()
-
- # ------------------------- Cost(logloss) --------------------------
-
cost = fluid.layers.log_loss(
input=self.predict, label=fluid.layers.cast(self.label, "float32"))
avg_cost = fluid.layers.reduce_sum(cost)
self._cost = avg_cost
- # ------------------------- Metric(Auc) --------------------------
-
predict_2d = fluid.layers.concat([1 - self.predict, self.predict], 1)
label_int = fluid.layers.cast(self.label, 'int64')
auc_var, batch_auc_var, _ = fluid.layers.auc(input=predict_2d,
@@ -155,12 +147,5 @@ class Model(ModelBase):
slide_steps=0)
self._metrics["AUC"] = auc_var
self._metrics["BATCH_AUC"] = batch_auc_var
-
- def optimizer(self):
- learning_rate = envs.get_global_env("hyper_parameters.learning_rate",
- None, self._namespace)
- optimizer = fluid.optimizer.Adam(learning_rate, lazy_mode=True)
- return optimizer
-
- def infer_net(self):
- self.train_net()
+ if is_infer:
+ self._infer_results["AUC"] = auc_var
diff --git a/models/rank/din/config.yaml b/models/rank/din/config.yaml
index bdf56b825e54bfbe161fa5e711d7401a25e3b8ee..2885ba7a58083be470d9bc2f8d2d030c2c3207b5 100755
--- a/models/rank/din/config.yaml
+++ b/models/rank/din/config.yaml
@@ -12,40 +12,60 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-train:
- trainer:
- # for cluster training
- strategy: "async"
+# global settings
+debug: false
+workspace: "paddlerec.models.rank.din"
- epochs: 10
- workspace: "paddlerec.models.rank.din"
+dataset:
+ - name: sample_1
+ type: DataLoader
+ batch_size: 5
+ data_path: "{workspace}/data/train_data"
+ data_converter: "{workspace}/reader.py"
+ - name: infer_sample
+ type: DataLoader
+ batch_size: 5
+ data_path: "{workspace}/data/train_data"
+ data_converter: "{workspace}/reader.py"
- reader:
- batch_size: 2
- class: "{workspace}/reader.py"
- train_data_path: "{workspace}/data/train_data"
- dataset_class: "DataLoader"
+hyper_parameters:
+ optimizer:
+ class: SGD
+ learning_rate: 0.0001
+ use_DataLoader: True
+ item_emb_size: 64
+ cat_emb_size: 64
+ is_sparse: False
+ item_count: 63001
+ cat_count: 801
- model:
- models: "{workspace}/model.py"
- hyper_parameters:
- use_DataLoader: True
- item_emb_size: 64
- cat_emb_size: 64
- is_sparse: False
- config_path: "data/config.txt"
- fc_sizes: [400, 400, 400]
- learning_rate: 0.0001
- reg: 0.001
- act: "sigmoid"
- optimizer: SGD
+ act: "sigmoid"
- save:
- increment:
- dirname: "increment"
- epoch_interval: 2
- save_last: True
- inference:
- dirname: "inference"
- epoch_interval: 4
- save_last: True
+
+mode: train_runner
+
+runner:
+ - name: train_runner
+ trainer_class: single_train
+ epochs: 1
+ device: cpu
+ init_model_path: ""
+ save_checkpoint_interval: 1
+ save_inference_interval: 1
+ save_checkpoint_path: "increment"
+ save_inference_path: "inference"
+ print_interval: 1
+ - name: infer_runner
+ trainer_class: single_infer
+ epochs: 1
+ device: cpu
+ init_model_path: "increment/0"
+phase:
+- name: phase1
+ model: "{workspace}/model.py"
+ dataset_name: sample_1
+ thread_num: 1
+#- name: infer_phase
+# model: "{workspace}/model.py"
+# dataset_name: infer_sample
+# thread_num: 1
diff --git a/models/rank/din/model.py b/models/rank/din/model.py
index c2acbe66b6c704655cf9a5aff86d583233672f6c..4f6099119fae745b3b0c975ddcef853d3dce35b8 100755
--- a/models/rank/din/model.py
+++ b/models/rank/din/model.py
@@ -22,12 +22,58 @@ class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
- def config_read(self, config_path):
- with open(config_path, "r") as fin:
- user_count = int(fin.readline().strip())
- item_count = int(fin.readline().strip())
- cat_count = int(fin.readline().strip())
- return user_count, item_count, cat_count
+ def _init_hyper_parameters(self):
+ self.item_emb_size = envs.get_global_env(
+ "hyper_parameters.item_emb_size", 64)
+ self.cat_emb_size = envs.get_global_env(
+ "hyper_parameters.cat_emb_size", 64)
+ self.act = envs.get_global_env("hyper_parameters.act", "sigmoid")
+ self.is_sparse = envs.get_global_env("hyper_parameters.is_sparse",
+ False)
+ #significant for speeding up the training process
+ self.use_DataLoader = envs.get_global_env(
+ "hyper_parameters.use_DataLoader", False)
+ self.item_count = envs.get_global_env("hyper_parameters.item_count",
+ 63001)
+ self.cat_count = envs.get_global_env("hyper_parameters.cat_count", 801)
+
+ def input_data(self, is_infer=False, **kwargs):
+ seq_len = -1
+ self.data_var = []
+ hist_item_seq = fluid.data(
+ name="hist_item_seq", shape=[None, seq_len], dtype="int64")
+ self.data_var.append(hist_item_seq)
+
+ hist_cat_seq = fluid.data(
+ name="hist_cat_seq", shape=[None, seq_len], dtype="int64")
+ self.data_var.append(hist_cat_seq)
+
+ target_item = fluid.data(
+ name="target_item", shape=[None], dtype="int64")
+ self.data_var.append(target_item)
+
+ target_cat = fluid.data(name="target_cat", shape=[None], dtype="int64")
+ self.data_var.append(target_cat)
+
+ label = fluid.data(name="label", shape=[None, 1], dtype="float32")
+ self.data_var.append(label)
+
+ mask = fluid.data(
+ name="mask", shape=[None, seq_len, 1], dtype="float32")
+ self.data_var.append(mask)
+
+ target_item_seq = fluid.data(
+ name="target_item_seq", shape=[None, seq_len], dtype="int64")
+ self.data_var.append(target_item_seq)
+
+ target_cat_seq = fluid.data(
+ name="target_cat_seq", shape=[None, seq_len], dtype="int64")
+ self.data_var.append(target_cat_seq)
+
+ train_inputs = [hist_item_seq] + [hist_cat_seq] + [target_item] + [
+ target_cat
+ ] + [label] + [mask] + [target_item_seq] + [target_cat_seq]
+ return train_inputs
def din_attention(self, hist, target_expand, mask):
"""activation weight"""
@@ -59,104 +105,58 @@ class Model(ModelBase):
out = fluid.layers.reshape(x=out, shape=[0, hidden_size])
return out
- def train_net(self):
- seq_len = -1
- self.item_emb_size = envs.get_global_env(
- "hyper_parameters.item_emb_size", 64, self._namespace)
- self.cat_emb_size = envs.get_global_env(
- "hyper_parameters.cat_emb_size", 64, self._namespace)
- self.act = envs.get_global_env("hyper_parameters.act", "sigmoid",
- self._namespace)
- #item_emb_size = 64
- #cat_emb_size = 64
- self.is_sparse = envs.get_global_env("hyper_parameters.is_sparse",
- False, self._namespace)
- #significant for speeding up the training process
- self.config_path = envs.get_global_env(
- "hyper_parameters.config_path", "data/config.txt", self._namespace)
- self.use_DataLoader = envs.get_global_env(
- "hyper_parameters.use_DataLoader", False, self._namespace)
- user_count, item_count, cat_count = self.config_read(self.config_path)
+ def net(self, inputs, is_infer=False):
+ hist_item_seq = inputs[0]
+ hist_cat_seq = inputs[1]
+ target_item = inputs[2]
+ target_cat = inputs[3]
+ label = inputs[4]
+ mask = inputs[5]
+ target_item_seq = inputs[6]
+ target_cat_seq = inputs[7]
item_emb_attr = fluid.ParamAttr(name="item_emb")
cat_emb_attr = fluid.ParamAttr(name="cat_emb")
- hist_item_seq = fluid.data(
- name="hist_item_seq", shape=[None, seq_len], dtype="int64")
- self._data_var.append(hist_item_seq)
-
- hist_cat_seq = fluid.data(
- name="hist_cat_seq", shape=[None, seq_len], dtype="int64")
- self._data_var.append(hist_cat_seq)
-
- target_item = fluid.data(
- name="target_item", shape=[None], dtype="int64")
- self._data_var.append(target_item)
-
- target_cat = fluid.data(name="target_cat", shape=[None], dtype="int64")
- self._data_var.append(target_cat)
-
- label = fluid.data(name="label", shape=[None, 1], dtype="float32")
- self._data_var.append(label)
-
- mask = fluid.data(
- name="mask", shape=[None, seq_len, 1], dtype="float32")
- self._data_var.append(mask)
-
- target_item_seq = fluid.data(
- name="target_item_seq", shape=[None, seq_len], dtype="int64")
- self._data_var.append(target_item_seq)
-
- target_cat_seq = fluid.data(
- name="target_cat_seq", shape=[None, seq_len], dtype="int64")
- self._data_var.append(target_cat_seq)
-
- if self.use_DataLoader:
- self._data_loader = fluid.io.DataLoader.from_generator(
- feed_list=self._data_var,
- capacity=10000,
- use_double_buffer=False,
- iterable=False)
-
hist_item_emb = fluid.embedding(
input=hist_item_seq,
- size=[item_count, self.item_emb_size],
+ size=[self.item_count, self.item_emb_size],
param_attr=item_emb_attr,
is_sparse=self.is_sparse)
hist_cat_emb = fluid.embedding(
input=hist_cat_seq,
- size=[cat_count, self.cat_emb_size],
+ size=[self.cat_count, self.cat_emb_size],
param_attr=cat_emb_attr,
is_sparse=self.is_sparse)
target_item_emb = fluid.embedding(
input=target_item,
- size=[item_count, self.item_emb_size],
+ size=[self.item_count, self.item_emb_size],
param_attr=item_emb_attr,
is_sparse=self.is_sparse)
target_cat_emb = fluid.embedding(
input=target_cat,
- size=[cat_count, self.cat_emb_size],
+ size=[self.cat_count, self.cat_emb_size],
param_attr=cat_emb_attr,
is_sparse=self.is_sparse)
target_item_seq_emb = fluid.embedding(
input=target_item_seq,
- size=[item_count, self.item_emb_size],
+ size=[self.item_count, self.item_emb_size],
param_attr=item_emb_attr,
is_sparse=self.is_sparse)
target_cat_seq_emb = fluid.embedding(
input=target_cat_seq,
- size=[cat_count, self.cat_emb_size],
+ size=[self.cat_count, self.cat_emb_size],
param_attr=cat_emb_attr,
is_sparse=self.is_sparse)
item_b = fluid.embedding(
input=target_item,
- size=[item_count, 1],
+ size=[self.item_count, 1],
param_attr=fluid.initializer.Constant(value=0.0))
hist_seq_concat = fluid.layers.concat(
@@ -195,12 +195,5 @@ class Model(ModelBase):
slide_steps=0)
self._metrics["AUC"] = auc_var
self._metrics["BATCH_AUC"] = batch_auc_var
-
- def optimizer(self):
- learning_rate = envs.get_global_env("hyper_parameters.learning_rate",
- None, self._namespace)
- optimizer = fluid.optimizer.Adam(learning_rate, lazy_mode=True)
- return optimizer
-
- def infer_net(self, parameter_list):
- self.deepfm_net()
+ if is_infer:
+ self._infer_results["AUC"] = auc_var
diff --git a/models/rank/din/reader.py b/models/rank/din/reader.py
index aba06141da6c60beb81ea446d3c7e7dc8a731df9..90d358b9f8122fd396bb1a6eb37cbb4d03b96143 100755
--- a/models/rank/din/reader.py
+++ b/models/rank/din/reader.py
@@ -29,8 +29,8 @@ from paddlerec.core.utils import envs
class TrainReader(Reader):
def init(self):
- self.train_data_path = envs.get_global_env("train_data_path", None,
- "train.reader")
+ self.train_data_path = envs.get_global_env(
+ "dataset.sample_1.data_path", None)
self.res = []
self.max_len = 0
@@ -46,7 +46,8 @@ class TrainReader(Reader):
fo = open("tmp.txt", "w")
fo.write(str(self.max_len))
fo.close()
- self.batch_size = envs.get_global_env("batch_size", 32, "train.reader")
+ self.batch_size = envs.get_global_env("dataset.sample_1.batch_size",
+ 32, "train.reader")
self.group_size = self.batch_size * 20
def _process_line(self, line):
diff --git a/models/rank/readme.md b/models/rank/readme.md
index bbcf32ea1fd60c372b0496bd893b89dcabdf5e8d..91e165b4edcedd72344bf1a57e134b0d037686e7 100755
--- a/models/rank/readme.md
+++ b/models/rank/readme.md
@@ -56,7 +56,18 @@
-## 使用教程 +## 使用教程(快速开始) +使用样例数据快速开始,参考[训练](###训练) & [预测](###预测) +## 使用教程(复现论文) +为了方便使用者能够快速的跑通每一个模型,我们在每个模型下都提供了样例数据,并且调整了batch_size等超参以便在样例数据上更加友好的显示训练&测试日志。如果需要复现readme中的效果请按照如下表格调整batch_size等超参,并使用提供的脚本下载对应数据集以及数据预处理。 +| 模型 | batch_size | thread_num | epoch_num | +| :------------------: | :--------------------: | :--------------------: | :--------------------: | +| DNN | 1000 | 10 | 1 | +| DCN | 512 | 20 | 2 | +| DeepFM | 100 | 10 | 30 | +| DIN | 32 | 10 | 100 | +| Wide&Deep | 40 | 1 | 40 | +| xDeepFM | 100 | 1 | 10 | ### 数据处理 参考每个模型目录数据下载&预处理脚本 @@ -68,11 +79,21 @@ sh run.sh ### 训练 ``` -python -m paddlerec.run -m paddlerec.models.rank.dnn # 以DNN为例 +cd modles/rank/dnn # 进入选定好的排序模型的目录 以DNN为例 +python -m paddlerec.run -m paddlerec.models.rank.dnn # 使用内置配置 +# 如果需要使用自定义配置,config.yaml中workspace需要使用改模型目录的绝对路径 +# 自定义修改超参后,指定配置文件,使用自定义配置 +python -m paddlerec.run -m ./config.yaml ``` ### 预测 ``` -python -m paddlerec.run -m paddlerec.models.rank.dnn # 以DNN为例 +# 修改对应模型的config.yaml,mode配置infer_runner +# 示例: mode: runner1 -> mode: infer_runner +# infer_runner中 class配置为 class: single_infer +# 如果训练阶段和预测阶段的模型输入一致,phase不需要改动,复用train的即可 + +# 修改完config.yaml后 执行: +python -m paddlerec.run -m ./config.yaml # 以DNN为例 ``` ## 效果对比 @@ -87,6 +108,7 @@ python -m paddlerec.run -m paddlerec.models.rank.dnn # 以DNN为例 | Census-income Data | Wide&Deep | 0.76195 | 0.90577 | -- | -- | | Amazon Product | DIN | 0.47005 | 0.86379 | -- | -- | + ## 分布式 ### 模型训练性能 (样本/s) | 数据集 | 模型 | 单机 | 同步 (4节点) | 同步 (8节点) | 同步 (16节点) | 同步 (32节点) | diff --git a/models/rank/wide_deep/config.yaml b/models/rank/wide_deep/config.yaml index 9cadddf2b16989ef9d6844f6ac40dc53b06e4309..af9e106e24a6c9a6e985f671fabf0e60c4f8608f 100755 --- a/models/rank/wide_deep/config.yaml +++ b/models/rank/wide_deep/config.yaml @@ -12,37 +12,59 @@ # See the License for the specific language governing permissions and # limitations under the License. -train: - trainer: - # for cluster training - strategy: "async" +# global settings +debug: false +workspace: "paddlerec.models.rank.wide_deep" - epochs: 10 - workspace: "paddlerec.models.rank.wide_deep" - reader: - batch_size: 2 - train_data_path: "{workspace}/data/sample_data/train" +dataset: + - name: sample_1 + type: QueueDataset + batch_size: 5 + data_path: "{workspace}/data/sample_data/train" sparse_slots: "label" dense_slots: "wide_input:8 deep_input:58" + - name: infer_sample + type: QueueDataset + batch_size: 5 + data_path: "{workspace}/data/sample_data/train" + sparse_slots: "label" + dense_slots: "wide_input:8 deep_input:58" + +hyper_parameters: + optimizer: + class: SGD + learning_rate: 0.0001 + hidden1_units: 75 + hidden2_units: 50 + hidden3_units: 25 + + +mode: train_runner +# if infer, change mode to "infer_runner" and change phase to "infer_phase" + +runner: + - name: train_runner + trainer_class: single_train + epochs: 1 + device: cpu + init_model_path: "" + save_checkpoint_interval: 1 + save_inference_interval: 1 + save_checkpoint_path: "increment" + save_inference_path: "inference" + - name: infer_runner + trainer_class: single_infer + epochs: 1 + device: cpu + init_model_path: "increment/0" - model: - models: "{workspace}/model.py" - hyper_parameters: - hidden1_units: 75 - hidden2_units: 50 - hidden3_units: 25 - learning_rate: 0.0001 - reg: 0.001 - act: "relu" - optimizer: SGD - - save: - increment: - dirname: "increment" - epoch_interval: 2 - save_last: True - inference: - dirname: "inference" - epoch_interval: 4 - save_last: True +phase: +- name: phase1 + model: "{workspace}/model.py" + dataset_name: sample_1 + thread_num: 1 +#- name: infer_phase +# model: "{workspace}/model.py" +# dataset_name: infer_sample +# thread_num: 1 diff --git a/models/rank/wide_deep/model.py b/models/rank/wide_deep/model.py index d798a54590d709c9f25f63638250bd12b0e62cbd..e9d4da603e5abf6b44ce86873795695c7cfe150b 100755 --- a/models/rank/wide_deep/model.py +++ b/models/rank/wide_deep/model.py @@ -24,6 +24,14 @@ class Model(ModelBase): def __init__(self, config): ModelBase.__init__(self, config) + def _init_hyper_parameters(self): + self.hidden1_units = envs.get_global_env( + "hyper_parameters.hidden1_units", 75) + self.hidden2_units = envs.get_global_env( + "hyper_parameters.hidden2_units", 50) + self.hidden3_units = envs.get_global_env( + "hyper_parameters.hidden3_units", 25) + def wide_part(self, data): out = fluid.layers.fc( input=data, @@ -56,21 +64,14 @@ class Model(ModelBase): return l3 - def train_net(self): - self._init_slots() + def net(self, inputs, is_infer=False): wide_input = self._dense_data_var[0] deep_input = self._dense_data_var[1] label = self._sparse_data_var[0] - hidden1_units = envs.get_global_env("hyper_parameters.hidden1_units", - 75, self._namespace) - hidden2_units = envs.get_global_env("hyper_parameters.hidden2_units", - 50, self._namespace) - hidden3_units = envs.get_global_env("hyper_parameters.hidden3_units", - 25, self._namespace) wide_output = self.wide_part(wide_input) - deep_output = self.deep_part(deep_input, hidden1_units, hidden2_units, - hidden3_units) + deep_output = self.deep_part(deep_input, self.hidden1_units, + self.hidden2_units, self.hidden3_units) wide_model = fluid.layers.fc( input=wide_output, @@ -109,18 +110,12 @@ class Model(ModelBase): self._metrics["AUC"] = auc_var self._metrics["BATCH_AUC"] = batch_auc self._metrics["ACC"] = acc + if is_infer: + self._infer_results["AUC"] = auc_var + self._infer_results["ACC"] = acc cost = fluid.layers.sigmoid_cross_entropy_with_logits( x=prediction, label=fluid.layers.cast( label, dtype='float32')) avg_cost = fluid.layers.mean(cost) self._cost = avg_cost - - def optimizer(self): - learning_rate = envs.get_global_env("hyper_parameters.learning_rate", - None, self._namespace) - optimizer = fluid.optimizer.Adam(learning_rate, lazy_mode=True) - return optimizer - - def infer_net(self): - self.train_net() diff --git a/models/rank/xdeepfm/config.yaml b/models/rank/xdeepfm/config.yaml index 37b6b65b4777b7a2d497cfd1c0213c3e88fe6baa..6274d58559f6eaf54549a8cc82b00c2c50684032 100755 --- a/models/rank/xdeepfm/config.yaml +++ b/models/rank/xdeepfm/config.yaml @@ -11,41 +11,61 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +debug: false +workspace: "paddlerec.models.rank.xdeepfm" -train: - trainer: - # for cluster training - strategy: "async" - - epochs: 10 - workspace: "paddlerec.models.rank.xdeepfm" - - reader: - batch_size: 2 - train_data_path: "{workspace}/data/sample_data/train" +dataset: + - name: sample_1 + type: QueueDataset #或者DataLoader + batch_size: 5 + data_path: "{workspace}/data/sample_data/train" + sparse_slots: "label feat_idx" + dense_slots: "feat_value:39" + - name: infer_sample + type: QueueDataset #或者DataLoader + batch_size: 5 + data_path: "{workspace}/data/sample_data/train" sparse_slots: "label feat_idx" dense_slots: "feat_value:39" - model: - models: "{workspace}/model.py" - hyper_parameters: - layer_sizes_dnn: [10, 10, 10] - layer_sizes_cin: [10, 10] - sparse_feature_number: 1086460 - sparse_feature_dim: 9 - num_field: 39 - fc_sizes: [400, 400, 400] - learning_rate: 0.0001 - reg: 0.0001 - act: "relu" - optimizer: SGD +hyper_parameters: + optimizer: + class: SGD + learning_rate: 0.0001 + layer_sizes_dnn: [10, 10, 10] + layer_sizes_cin: [10, 10] + sparse_feature_number: 1086460 + sparse_feature_dim: 9 + num_field: 39 + fc_sizes: [400, 400, 400] + act: "relu" + + +mode: train_runner +# if infer, change mode to "infer_runner" and change phase to "infer_phase" + +runner: + - name: train_runner + trainer_class: single_train + epochs: 1 + device: cpu + init_model_path: "" + save_checkpoint_interval: 1 + save_inference_interval: 1 + save_checkpoint_path: "increment" + save_inference_path: "inference" + - name: infer_runner + trainer_class: single_infer + epochs: 1 + device: cpu + init_model_path: "increment/0" - save: - increment: - dirname: "increment" - epoch_interval: 2 - save_last: True - inference: - dirname: "inference" - epoch_interval: 4 - save_last: True +phase: +- name: phase1 + model: "{workspace}/model.py" + dataset_name: sample_1 + thread_num: 1 +#- name: infer_phase +# model: "{workspace}/model.py" +# dataset_name: infer_sample +# thread_num: 1 diff --git a/models/rank/xdeepfm/model.py b/models/rank/xdeepfm/model.py index 23443c7d79e78690e6669716901238710599e3b7..4ca057bdcf9b858b7423b3fbaaa8e1e51d12ae86 100755 --- a/models/rank/xdeepfm/model.py +++ b/models/rank/xdeepfm/model.py @@ -22,38 +22,45 @@ class Model(ModelBase): def __init__(self, config): ModelBase.__init__(self, config) - def xdeepfm_net(self): + def _init_hyper_parameters(self): + self.sparse_feature_number = envs.get_global_env( + "hyper_parameters.sparse_feature_number", None) + self.sparse_feature_dim = envs.get_global_env( + "hyper_parameters.sparse_feature_dim", None) + self.num_field = envs.get_global_env("hyper_parameters.num_field", + None) + self.layer_sizes_cin = envs.get_global_env( + "hyper_parameters.layer_sizes_cin", None) + self.layer_sizes_dnn = envs.get_global_env( + "hyper_parameters.layer_sizes_dnn", None) + self.act = envs.get_global_env("hyper_parameters.act", None) + + def net(self, inputs, is_infer=False): + raw_feat_idx = self._sparse_data_var[1] + raw_feat_value = self._dense_data_var[0] + self.label = self._sparse_data_var[0] + init_value_ = 0.1 initer = fluid.initializer.TruncatedNormalInitializer( loc=0.0, scale=init_value_) is_distributed = True if envs.get_trainer() == "CtrTrainer" else False - sparse_feature_number = envs.get_global_env( - "hyper_parameters.sparse_feature_number", None, self._namespace) - sparse_feature_dim = envs.get_global_env( - "hyper_parameters.sparse_feature_dim", None, self._namespace) # ------------------------- network input -------------------------- - num_field = envs.get_global_env("hyper_parameters.num_field", None, - self._namespace) - raw_feat_idx = self._sparse_data_var[1] - raw_feat_value = self._dense_data_var[0] - self.label = self._sparse_data_var[0] - feat_idx = raw_feat_idx feat_value = fluid.layers.reshape( - raw_feat_value, [-1, num_field, 1]) # None * num_field * 1 + raw_feat_value, [-1, self.num_field, 1]) # None * num_field * 1 feat_embeddings = fluid.embedding( input=feat_idx, is_sparse=True, dtype='float32', - size=[sparse_feature_number + 1, sparse_feature_dim], + size=[self.sparse_feature_number + 1, self.sparse_feature_dim], padding_idx=0, param_attr=fluid.ParamAttr(initializer=initer)) feat_embeddings = fluid.layers.reshape(feat_embeddings, [ - -1, num_field, sparse_feature_dim + -1, self.num_field, self.sparse_feature_dim ]) # None * num_field * embedding_size feat_embeddings = feat_embeddings * feat_value # None * num_field * embedding_size @@ -63,11 +70,11 @@ class Model(ModelBase): input=feat_idx, is_sparse=True, dtype='float32', - size=[sparse_feature_number + 1, 1], + size=[self.sparse_feature_number + 1, 1], padding_idx=0, param_attr=fluid.ParamAttr(initializer=initer)) weights_linear = fluid.layers.reshape( - weights_linear, [-1, num_field, 1]) # None * num_field * 1 + weights_linear, [-1, self.num_field, 1]) # None * num_field * 1 b_linear = fluid.layers.create_parameter( shape=[1], dtype='float32', @@ -77,31 +84,30 @@ class Model(ModelBase): # -------------------- CIN -------------------- - layer_sizes_cin = envs.get_global_env( - "hyper_parameters.layer_sizes_cin", None, self._namespace) Xs = [feat_embeddings] - last_s = num_field - for s in layer_sizes_cin: + last_s = self.num_field + for s in self.layer_sizes_cin: # calculate Z^(k+1) with X^k and X^0 X_0 = fluid.layers.reshape( fluid.layers.transpose(Xs[0], [0, 2, 1]), - [-1, sparse_feature_dim, num_field, + [-1, self.sparse_feature_dim, self.num_field, 1]) # None, embedding_size, num_field, 1 X_k = fluid.layers.reshape( fluid.layers.transpose(Xs[-1], [0, 2, 1]), - [-1, sparse_feature_dim, 1, + [-1, self.sparse_feature_dim, 1, last_s]) # None, embedding_size, 1, last_s Z_k_1 = fluid.layers.matmul( X_0, X_k) # None, embedding_size, num_field, last_s # compresses Z^(k+1) to X^(k+1) Z_k_1 = fluid.layers.reshape(Z_k_1, [ - -1, sparse_feature_dim, last_s * num_field + -1, self.sparse_feature_dim, last_s * self.num_field ]) # None, embedding_size, last_s*num_field Z_k_1 = fluid.layers.transpose( Z_k_1, [0, 2, 1]) # None, s*num_field, embedding_size Z_k_1 = fluid.layers.reshape( - Z_k_1, [-1, last_s * num_field, 1, sparse_feature_dim] + Z_k_1, + [-1, last_s * self.num_field, 1, self.sparse_feature_dim] ) # None, last_s*num_field, 1, embedding_size (None, channal_in, h, w) X_k_1 = fluid.layers.conv2d( Z_k_1, @@ -112,7 +118,8 @@ class Model(ModelBase): param_attr=fluid.ParamAttr( initializer=initer)) # None, s, 1, embedding_size X_k_1 = fluid.layers.reshape( - X_k_1, [-1, s, sparse_feature_dim]) # None, s, embedding_size + X_k_1, + [-1, s, self.sparse_feature_dim]) # None, s, embedding_size Xs.append(X_k_1) last_s = s @@ -130,17 +137,13 @@ class Model(ModelBase): # -------------------- DNN -------------------- - layer_sizes_dnn = envs.get_global_env( - "hyper_parameters.layer_sizes_dnn", None, self._namespace) - act = envs.get_global_env("hyper_parameters.act", None, - self._namespace) - y_dnn = fluid.layers.reshape(feat_embeddings, - [-1, num_field * sparse_feature_dim]) - for s in layer_sizes_dnn: + y_dnn = fluid.layers.reshape( + feat_embeddings, [-1, self.num_field * self.sparse_feature_dim]) + for s in self.layer_sizes_dnn: y_dnn = fluid.layers.fc( input=y_dnn, size=s, - act=act, + act=self.act, param_attr=fluid.ParamAttr(initializer=initer), bias_attr=None) y_dnn = fluid.layers.fc(input=y_dnn, @@ -152,11 +155,6 @@ class Model(ModelBase): # ------------------- xDeepFM ------------------ self.predict = fluid.layers.sigmoid(y_linear + y_cin + y_dnn) - - def train_net(self): - self._init_slots() - self.xdeepfm_net() - cost = fluid.layers.log_loss( input=self.predict, label=fluid.layers.cast(self.label, "float32"), @@ -172,12 +170,5 @@ class Model(ModelBase): slide_steps=0) self._metrics["AUC"] = auc_var self._metrics["BATCH_AUC"] = batch_auc_var - - def optimizer(self): - learning_rate = envs.get_global_env("hyper_parameters.learning_rate", - None, self._namespace) - optimizer = fluid.optimizer.Adam(learning_rate, lazy_mode=True) - return optimizer - - def infer_net(self): - self.train_net() + if is_infer: + self._infer_results["AUC"] = auc_var diff --git a/models/recall/gru4rec/config.yaml b/models/recall/gru4rec/config.yaml index 744515b4f453756545b7171f8c7285042c8afca5..90cc2d2debca27a0a5e5e7c2fba512c2796a1b14 100644 --- a/models/recall/gru4rec/config.yaml +++ b/models/recall/gru4rec/config.yaml @@ -12,47 +12,59 @@ # See the License for the specific language governing permissions and # limitations under the License. -evaluate: - reader: - batch_size: 1 - class: "{workspace}/rsc15_infer_reader.py" - test_data_path: "{workspace}/data/train" - is_return_numpy: False +workspace: "paddlerec.models.recall.gru4rec" +dataset: +- name: dataset_train + batch_size: 5 + type: QueueDataset + data_path: "{workspace}/data/train" + data_converter: "{workspace}/rsc15_reader.py" +- name: dataset_infer + batch_size: 5 + type: QueueDataset + data_path: "{workspace}/data/test" + data_converter: "{workspace}/rsc15_reader.py" -train: - trainer: - # for cluster training - strategy: "async" +hyper_parameters: + vocab_size: 1000 + hid_size: 100 + emb_lr_x: 10.0 + gru_lr_x: 1.0 + fc_lr_x: 1.0 + init_low_bound: -0.04 + init_high_bound: 0.04 + optimizer: + class: adagrad + learning_rate: 0.01 + strategy: async +#use infer_runner mode and modify 'phase' below if infer +mode: train_runner +#mode: infer_runner + +runner: +- name: train_runner + class: single_train + device: cpu epochs: 3 - workspace: "paddlerec.models.recall.gru4rec" + save_checkpoint_interval: 2 + save_inference_interval: 4 + save_checkpoint_path: "increment" + save_inference_path: "inference" + print_interval: 10 +- name: infer_runner + class: single_infer + init_model_path: "increment/0" device: cpu + epochs: 3 - reader: - batch_size: 5 - class: "{workspace}/rsc15_reader.py" - train_data_path: "{workspace}/data/train" - - model: - models: "{workspace}/model.py" - hyper_parameters: - vocab_size: 1000 - hid_size: 100 - emb_lr_x: 10.0 - gru_lr_x: 1.0 - fc_lr_x: 1.0 - init_low_bound: -0.04 - init_high_bound: 0.04 - learning_rate: 0.01 - optimizer: adagrad - - save: - increment: - dirname: "increment" - epoch_interval: 2 - save_last: True - inference: - dirname: "inference" - epoch_interval: 4 - save_last: True +phase: +- name: train + model: "{workspace}/model.py" + dataset_name: dataset_train + thread_num: 1 + #- name: infer + # model: "{workspace}/model.py" + # dataset_name: dataset_infer + # thread_num: 1 diff --git a/models/recall/gru4rec/model.py b/models/recall/gru4rec/model.py index 6848f1e65d51c9d5e3f9890b3f3f148ef68829fc..571deadf7d97c1010a03590d5360337528b25685 100644 --- a/models/recall/gru4rec/model.py +++ b/models/recall/gru4rec/model.py @@ -22,84 +22,72 @@ class Model(ModelBase): def __init__(self, config): ModelBase.__init__(self, config) - def all_vocab_network(self, is_infer=False): - """ network definition """ - recall_k = envs.get_global_env("hyper_parameters.recall_k", None, - self._namespace) - vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, - self._namespace) - hid_size = envs.get_global_env("hyper_parameters.hid_size", None, - self._namespace) - init_low_bound = envs.get_global_env("hyper_parameters.init_low_bound", - None, self._namespace) - init_high_bound = envs.get_global_env( - "hyper_parameters.init_high_bound", None, self._namespace) - emb_lr_x = envs.get_global_env("hyper_parameters.emb_lr_x", None, - self._namespace) - gru_lr_x = envs.get_global_env("hyper_parameters.gru_lr_x", None, - self._namespace) - fc_lr_x = envs.get_global_env("hyper_parameters.fc_lr_x", None, - self._namespace) + def _init_hyper_parameters(self): + self.recall_k = envs.get_global_env("hyper_parameters.recall_k") + self.vocab_size = envs.get_global_env("hyper_parameters.vocab_size") + self.hid_size = envs.get_global_env("hyper_parameters.hid_size") + self.init_low_bound = envs.get_global_env( + "hyper_parameters.init_low_bound") + self.init_high_bound = envs.get_global_env( + "hyper_parameters.init_high_bound") + self.emb_lr_x = envs.get_global_env("hyper_parameters.emb_lr_x") + self.gru_lr_x = envs.get_global_env("hyper_parameters.gru_lr_x") + self.fc_lr_x = envs.get_global_env("hyper_parameters.fc_lr_x") + + def input_data(self, is_infer=False, **kwargs): + # Input data src_wordseq = fluid.data( name="src_wordseq", shape=[None, 1], dtype="int64", lod_level=1) dst_wordseq = fluid.data( name="dst_wordseq", shape=[None, 1], dtype="int64", lod_level=1) - if is_infer: - self._infer_data_var = [src_wordseq, dst_wordseq] - self._infer_data_loader = fluid.io.DataLoader.from_generator( - feed_list=self._infer_data_var, - capacity=64, - use_double_buffer=False, - iterable=False) + return [src_wordseq, dst_wordseq] + + def net(self, inputs, is_infer=False): + src_wordseq = inputs[0] + dst_wordseq = inputs[1] emb = fluid.embedding( input=src_wordseq, - size=[vocab_size, hid_size], + size=[self.vocab_size, self.hid_size], param_attr=fluid.ParamAttr( name="emb", initializer=fluid.initializer.Uniform( - low=init_low_bound, high=init_high_bound), - learning_rate=emb_lr_x), + low=self.init_low_bound, high=self.init_high_bound), + learning_rate=self.emb_lr_x), is_sparse=True) fc0 = fluid.layers.fc(input=emb, - size=hid_size * 3, + size=self.hid_size * 3, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( - low=init_low_bound, - high=init_high_bound), - learning_rate=gru_lr_x)) + low=self.init_low_bound, + high=self.init_high_bound), + learning_rate=self.gru_lr_x)) gru_h0 = fluid.layers.dynamic_gru( input=fc0, - size=hid_size, + size=self.hid_size, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( - low=init_low_bound, high=init_high_bound), - learning_rate=gru_lr_x)) + low=self.init_low_bound, high=self.init_high_bound), + learning_rate=self.gru_lr_x)) fc = fluid.layers.fc(input=gru_h0, - size=vocab_size, + size=self.vocab_size, act='softmax', param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( - low=init_low_bound, high=init_high_bound), - learning_rate=fc_lr_x)) + low=self.init_low_bound, + high=self.init_high_bound), + learning_rate=self.fc_lr_x)) cost = fluid.layers.cross_entropy(input=fc, label=dst_wordseq) - acc = fluid.layers.accuracy(input=fc, label=dst_wordseq, k=recall_k) + acc = fluid.layers.accuracy( + input=fc, label=dst_wordseq, k=self.recall_k) if is_infer: self._infer_results['recall20'] = acc return avg_cost = fluid.layers.mean(x=cost) - self._data_var.append(src_wordseq) - self._data_var.append(dst_wordseq) self._cost = avg_cost self._metrics["cost"] = avg_cost self._metrics["acc"] = acc - - def train_net(self): - self.all_vocab_network() - - def infer_net(self): - self.all_vocab_network(is_infer=True) diff --git a/models/recall/gru4rec/rsc15_infer_reader.py b/models/recall/gru4rec/rsc15_infer_reader.py deleted file mode 100644 index b58532a471f4b70eedfebeeadb35df20b4c40e72..0000000000000000000000000000000000000000 --- a/models/recall/gru4rec/rsc15_infer_reader.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -from paddlerec.core.reader import Reader - - -class EvaluateReader(Reader): - def init(self): - pass - - def generate_sample(self, line): - """ - Read the data line by line and process it as a dictionary - """ - - def reader(): - """ - This function needs to be implemented by the user, based on data format - """ - l = line.strip().split() - l = [w for w in l] - src_seq = l[:len(l) - 1] - src_seq = [int(e) for e in src_seq] - trg_seq = l[1:] - trg_seq = [int(e) for e in trg_seq] - feature_name = ["src_wordseq", "dst_wordseq"] - yield zip(feature_name, [src_seq] + [trg_seq]) - - return reader diff --git a/models/recall/ncf/config.yaml b/models/recall/ncf/config.yaml index 249f6fccefa3b8ec11376a390433dd52c84682e7..16d298b12fd551bd8421b44bc12d536fdc962e8b 100644 --- a/models/recall/ncf/config.yaml +++ b/models/recall/ncf/config.yaml @@ -12,42 +12,56 @@ # See the License for the specific language governing permissions and # limitations under the License. -evaluate: - reader: - batch_size: 1 - class: "{workspace}/movielens_infer_reader.py" - test_data_path: "{workspace}/data/test" +workspace: "paddlerec.models.recall.ncf" -train: - trainer: - # for cluster training - strategy: "async" +dataset: +- name: dataset_train + batch_size: 5 + type: QueueDataset + data_path: "{workspace}/data/train" + data_converter: "{workspace}/movielens_reader.py" +- name: dataset_infer + batch_size: 5 + type: QueueDataset + data_path: "{workspace}/data/test" + data_converter: "{workspace}/movielens_infer_reader.py" - epochs: 3 - workspace: "paddlerec.models.recall.ncf" - device: cpu +hyper_parameters: + num_users: 6040 + num_items: 3706 + latent_dim: 8 + fc_layers: [64, 32, 16, 8] + optimizer: + class: adam + learning_rate: 0.001 + strategy: async - reader: - batch_size: 2 - class: "{workspace}/movielens_reader.py" - train_data_path: "{workspace}/data/train" +#use infer_runner mode and modify 'phase' below if infer +mode: train_runner +#mode: infer_runner - model: - models: "{workspace}/model.py" - hyper_parameters: - num_users: 6040 - num_items: 3706 - latent_dim: 8 - layers: [64, 32, 16, 8] - learning_rate: 0.001 - optimizer: adam +runner: +- name: train_runner + class: single_train + device: cpu + epochs: 3 + save_checkpoint_interval: 2 + save_inference_interval: 4 + save_checkpoint_path: "increment" + save_inference_path: "inference" + print_interval: 10 +- name: infer_runner + class: single_infer + init_model_path: "increment/0" + device: cpu + epochs: 3 - save: - increment: - dirname: "increment" - epoch_interval: 2 - save_last: True - inference: - dirname: "inference" - epoch_interval: 4 - save_last: True +phase: +- name: train + model: "{workspace}/model.py" + dataset_name: dataset_train + thread_num: 1 + #- name: infer + # model: "{workspace}/model.py" + # dataset_name: dataset_infer + # thread_num: 1 diff --git a/models/recall/ncf/model.py b/models/recall/ncf/model.py index d2b7fa371be8f068e11e1dd37a63a90b55e96e65..bc8b71cd85af647e054dda38048da68703859c88 100644 --- a/models/recall/ncf/model.py +++ b/models/recall/ncf/model.py @@ -24,7 +24,13 @@ class Model(ModelBase): def __init__(self, config): ModelBase.__init__(self, config) - def input_data(self, is_infer=False): + def _init_hyper_parameters(self): + self.num_users = envs.get_global_env("hyper_parameters.num_users") + self.num_items = envs.get_global_env("hyper_parameters.num_items") + self.latent_dim = envs.get_global_env("hyper_parameters.latent_dim") + self.layers = envs.get_global_env("hyper_parameters.fc_layers") + + def input_data(self, is_infer=False, **kwargs): user_input = fluid.data( name="user_input", shape=[-1, 1], dtype="int64", lod_level=0) item_input = fluid.data( @@ -35,45 +41,35 @@ class Model(ModelBase): inputs = [user_input] + [item_input] else: inputs = [user_input] + [item_input] + [label] - self._data_var = inputs return inputs def net(self, inputs, is_infer=False): - num_users = envs.get_global_env("hyper_parameters.num_users", None, - self._namespace) - num_items = envs.get_global_env("hyper_parameters.num_items", None, - self._namespace) - latent_dim = envs.get_global_env("hyper_parameters.latent_dim", None, - self._namespace) - layers = envs.get_global_env("hyper_parameters.layers", None, - self._namespace) - - num_layer = len(layers) #Number of layers in the MLP + num_layer = len(self.layers) #Number of layers in the MLP MF_Embedding_User = fluid.embedding( input=inputs[0], - size=[num_users, latent_dim], + size=[self.num_users, self.latent_dim], param_attr=fluid.initializer.Normal( loc=0.0, scale=0.01), is_sparse=True) MF_Embedding_Item = fluid.embedding( input=inputs[1], - size=[num_items, latent_dim], + size=[self.num_items, self.latent_dim], param_attr=fluid.initializer.Normal( loc=0.0, scale=0.01), is_sparse=True) MLP_Embedding_User = fluid.embedding( input=inputs[0], - size=[num_users, int(layers[0] / 2)], + size=[self.num_users, int(self.layers[0] / 2)], param_attr=fluid.initializer.Normal( loc=0.0, scale=0.01), is_sparse=True) MLP_Embedding_Item = fluid.embedding( input=inputs[1], - size=[num_items, int(layers[0] / 2)], + size=[self.num_items, int(self.layers[0] / 2)], param_attr=fluid.initializer.Normal( loc=0.0, scale=0.01), is_sparse=True) @@ -94,7 +90,7 @@ class Model(ModelBase): for i in range(1, num_layer): mlp_vector = fluid.layers.fc( input=mlp_vector, - size=layers[i], + size=self.layers[i], act='relu', param_attr=fluid.ParamAttr( initializer=fluid.initializer.TruncatedNormal( @@ -126,16 +122,3 @@ class Model(ModelBase): self._cost = avg_cost self._metrics["cost"] = avg_cost - - def train_net(self): - input_data = self.input_data() - self.net(input_data) - - def infer_net(self): - self._infer_data_var = self.input_data(is_infer=True) - self._infer_data_loader = fluid.io.DataLoader.from_generator( - feed_list=self._infer_data_var, - capacity=64, - use_double_buffer=False, - iterable=False) - self.net(self._infer_data_var, is_infer=True) diff --git a/models/recall/ncf/movielens_infer_reader.py b/models/recall/ncf/movielens_infer_reader.py index dc737aed2b8f93a5d4274938cf468e8d9240be04..148c8008eb058ee3a126b1ec3253f2893d2e7150 100644 --- a/models/recall/ncf/movielens_infer_reader.py +++ b/models/recall/ncf/movielens_infer_reader.py @@ -19,7 +19,7 @@ from collections import defaultdict import numpy as np -class EvaluateReader(Reader): +class TrainReader(Reader): def init(self): pass diff --git a/models/recall/ssr/config.yaml b/models/recall/ssr/config.yaml index b7879466969605928922d07e6f624ff31566c898..7dcecde84d6119501dea9c84047b705e2a9ba410 100644 --- a/models/recall/ssr/config.yaml +++ b/models/recall/ssr/config.yaml @@ -12,43 +12,55 @@ # See the License for the specific language governing permissions and # limitations under the License. +workspace: "paddlerec.models.recall.ssr" -evaluate: - reader: - batch_size: 1 - class: "{workspace}/ssr_infer_reader.py" - test_data_path: "{workspace}/data/train" - is_return_numpy: True +dataset: +- name: dataset_train + batch_size: 5 + type: QueueDataset + data_path: "{workspace}/data/train" + data_converter: "{workspace}/ssr_reader.py" +- name: dataset_infer + batch_size: 5 + type: QueueDataset + data_path: "{workspace}/data/test" + data_converter: "{workspace}/ssr_infer_reader.py" -train: - trainer: - # for cluster training - strategy: "async" +hyper_parameters: + vocab_size: 1000 + emb_dim: 128 + hidden_size: 100 + optimizer: + class: adagrad + learning_rate: 0.01 + strategy: async +#use infer_runner mode and modify 'phase' below if infer +mode: train_runner +#mode: infer_runner + +runner: +- name: train_runner + class: single_train + device: cpu epochs: 3 - workspace: "paddlerec.models.recall.ssr" + save_checkpoint_interval: 2 + save_inference_interval: 4 + save_checkpoint_path: "increment" + save_inference_path: "inference" + print_interval: 10 +- name: infer_runner + class: single_infer + init_model_path: "increment/0" device: cpu + epochs: 3 - reader: - batch_size: 5 - class: "{workspace}/ssr_reader.py" - train_data_path: "{workspace}/data/train" - - model: - models: "{workspace}/model.py" - hyper_parameters: - vocab_size: 1000 - emb_dim: 128 - hidden_size: 100 - learning_rate: 0.01 - optimizer: adagrad - - save: - increment: - dirname: "increment" - epoch_interval: 2 - save_last: True - inference: - dirname: "inference" - epoch_interval: 4 - save_last: True +phase: +- name: train + model: "{workspace}/model.py" + dataset_name: dataset_train + thread_num: 1 + #- name: infer + # model: "{workspace}/model.py" + # dataset_name: dataset_infer + # thread_num: 1 diff --git a/models/recall/ssr/model.py b/models/recall/ssr/model.py index 3abe3ae41514d97d46d86b52680076cf5932386c..b97a5927f736e97c763fec177882f40097650011 100644 --- a/models/recall/ssr/model.py +++ b/models/recall/ssr/model.py @@ -20,85 +20,45 @@ from paddlerec.core.utils import envs from paddlerec.core.model import Model as ModelBase -class BowEncoder(object): - """ bow-encoder """ - - def __init__(self): - self.param_name = "" - - def forward(self, emb): - return fluid.layers.sequence_pool(input=emb, pool_type='sum') - - -class GrnnEncoder(object): - """ grnn-encoder """ - - def __init__(self, param_name="grnn", hidden_size=128): - self.param_name = param_name - self.hidden_size = hidden_size - - def forward(self, emb): - fc0 = fluid.layers.fc(input=emb, - size=self.hidden_size * 3, - param_attr=self.param_name + "_fc.w", - bias_attr=False) - - gru_h = fluid.layers.dynamic_gru( - input=fc0, - size=self.hidden_size, - is_reverse=False, - param_attr=self.param_name + ".param", - bias_attr=self.param_name + ".bias") - return fluid.layers.sequence_pool(input=gru_h, pool_type='max') - - -class PairwiseHingeLoss(object): - def __init__(self, margin=0.8): - self.margin = margin - - def forward(self, pos, neg): - loss_part1 = fluid.layers.elementwise_sub( - tensor.fill_constant_batch_size_like( - input=pos, shape=[-1, 1], value=self.margin, dtype='float32'), - pos) - loss_part2 = fluid.layers.elementwise_add(loss_part1, neg) - loss_part3 = fluid.layers.elementwise_max( - tensor.fill_constant_batch_size_like( - input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), - loss_part2) - return loss_part3 - - class Model(ModelBase): def __init__(self, config): ModelBase.__init__(self, config) - def get_correct(self, x, y): - less = tensor.cast(cf.less_than(x, y), dtype='float32') - correct = fluid.layers.reduce_sum(less) - return correct - - def train(self): - vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, - self._namespace) - emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None, - self._namespace) - hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None, - self._namespace) - emb_shape = [vocab_size, emb_dim] - + def _init_hyper_parameters(self): + self.vocab_size = envs.get_global_env("hyper_parameters.vocab_size") + self.emb_dim = envs.get_global_env("hyper_parameters.emb_dim") + self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size") + + def input_data(self, is_infer=False, **kwargs): + if is_infer: + user_data = fluid.data( + name="user", shape=[None, 1], dtype="int64", lod_level=1) + all_item_data = fluid.data( + name="all_item", shape=[None, self.vocab_size], dtype="int64") + pos_label = fluid.data( + name="pos_label", shape=[None, 1], dtype="int64") + return [user_data, all_item_data, pos_label] + else: + user_data = fluid.data( + name="user", shape=[None, 1], dtype="int64", lod_level=1) + pos_item_data = fluid.data( + name="p_item", shape=[None, 1], dtype="int64", lod_level=1) + neg_item_data = fluid.data( + name="n_item", shape=[None, 1], dtype="int64", lod_level=1) + return [user_data, pos_item_data, neg_item_data] + + def net(self, inputs, is_infer=False): + if is_infer: + self._infer_net(inputs) + return + user_data = inputs[0] + pos_item_data = inputs[1] + neg_item_data = inputs[2] + emb_shape = [self.vocab_size, self.emb_dim] self.user_encoder = GrnnEncoder() self.item_encoder = BowEncoder() self.pairwise_hinge_loss = PairwiseHingeLoss() - user_data = fluid.data( - name="user", shape=[None, 1], dtype="int64", lod_level=1) - pos_item_data = fluid.data( - name="p_item", shape=[None, 1], dtype="int64", lod_level=1) - neg_item_data = fluid.data( - name="n_item", shape=[None, 1], dtype="int64", lod_level=1) - self._data_var.extend([user_data, pos_item_data, neg_item_data]) - user_emb = fluid.embedding( input=user_data, size=emb_shape, param_attr="emb.item") pos_item_emb = fluid.embedding( @@ -109,79 +69,115 @@ class Model(ModelBase): pos_item_enc = self.item_encoder.forward(pos_item_emb) neg_item_enc = self.item_encoder.forward(neg_item_emb) user_hid = fluid.layers.fc(input=user_enc, - size=hidden_size, + size=self.hidden_size, param_attr='user.w', bias_attr="user.b") pos_item_hid = fluid.layers.fc(input=pos_item_enc, - size=hidden_size, + size=self.hidden_size, param_attr='item.w', bias_attr="item.b") neg_item_hid = fluid.layers.fc(input=neg_item_enc, - size=hidden_size, + size=self.hidden_size, param_attr='item.w', bias_attr="item.b") cos_pos = fluid.layers.cos_sim(user_hid, pos_item_hid) cos_neg = fluid.layers.cos_sim(user_hid, neg_item_hid) hinge_loss = self.pairwise_hinge_loss.forward(cos_pos, cos_neg) avg_cost = fluid.layers.mean(hinge_loss) - correct = self.get_correct(cos_neg, cos_pos) + correct = self._get_correct(cos_neg, cos_pos) self._cost = avg_cost self._metrics["correct"] = correct self._metrics["hinge_loss"] = hinge_loss - def train_net(self): - self.train() - - def infer(self): - vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, - self._namespace) - emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None, - self._namespace) - hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None, - self._namespace) - - user_data = fluid.data( - name="user", shape=[None, 1], dtype="int64", lod_level=1) - all_item_data = fluid.data( - name="all_item", shape=[None, vocab_size], dtype="int64") - pos_label = fluid.data( - name="pos_label", shape=[None, 1], dtype="int64") - self._infer_data_var = [user_data, all_item_data, pos_label] - self._infer_data_loader = fluid.io.DataLoader.from_generator( - feed_list=self._infer_data_var, - capacity=64, - use_double_buffer=False, - iterable=False) + def _infer_net(self, inputs): + user_data = inputs[0] + all_item_data = inputs[1] + pos_label = inputs[2] user_emb = fluid.embedding( - input=user_data, size=[vocab_size, emb_dim], param_attr="emb.item") + input=user_data, + size=[self.vocab_size, self.emb_dim], + param_attr="emb.item") all_item_emb = fluid.embedding( input=all_item_data, - size=[vocab_size, emb_dim], + size=[self.vocab_size, self.emb_dim], param_attr="emb.item") all_item_emb_re = fluid.layers.reshape( - x=all_item_emb, shape=[-1, emb_dim]) + x=all_item_emb, shape=[-1, self.emb_dim]) user_encoder = GrnnEncoder() user_enc = user_encoder.forward(user_emb) user_hid = fluid.layers.fc(input=user_enc, - size=hidden_size, + size=self.hidden_size, param_attr='user.w', bias_attr="user.b") user_exp = fluid.layers.expand( - x=user_hid, expand_times=[1, vocab_size]) - user_re = fluid.layers.reshape(x=user_exp, shape=[-1, hidden_size]) + x=user_hid, expand_times=[1, self.vocab_size]) + user_re = fluid.layers.reshape( + x=user_exp, shape=[-1, self.hidden_size]) all_item_hid = fluid.layers.fc(input=all_item_emb_re, - size=hidden_size, + size=self.hidden_size, param_attr='item.w', bias_attr="item.b") cos_item = fluid.layers.cos_sim(X=all_item_hid, Y=user_re) - all_pre_ = fluid.layers.reshape(x=cos_item, shape=[-1, vocab_size]) + all_pre_ = fluid.layers.reshape( + x=cos_item, shape=[-1, self.vocab_size]) acc = fluid.layers.accuracy(input=all_pre_, label=pos_label, k=20) self._infer_results['recall20'] = acc - def infer_net(self): - self.infer() + def _get_correct(self, x, y): + less = tensor.cast(cf.less_than(x, y), dtype='float32') + correct = fluid.layers.reduce_sum(less) + return correct + + +class BowEncoder(object): + """ bow-encoder """ + + def __init__(self): + self.param_name = "" + + def forward(self, emb): + return fluid.layers.sequence_pool(input=emb, pool_type='sum') + + +class GrnnEncoder(object): + """ grnn-encoder """ + + def __init__(self, param_name="grnn", hidden_size=128): + self.param_name = param_name + self.hidden_size = hidden_size + + def forward(self, emb): + fc0 = fluid.layers.fc(input=emb, + size=self.hidden_size * 3, + param_attr=self.param_name + "_fc.w", + bias_attr=False) + + gru_h = fluid.layers.dynamic_gru( + input=fc0, + size=self.hidden_size, + is_reverse=False, + param_attr=self.param_name + ".param", + bias_attr=self.param_name + ".bias") + return fluid.layers.sequence_pool(input=gru_h, pool_type='max') + + +class PairwiseHingeLoss(object): + def __init__(self, margin=0.8): + self.margin = margin + + def forward(self, pos, neg): + loss_part1 = fluid.layers.elementwise_sub( + tensor.fill_constant_batch_size_like( + input=pos, shape=[-1, 1], value=self.margin, dtype='float32'), + pos) + loss_part2 = fluid.layers.elementwise_add(loss_part1, neg) + loss_part3 = fluid.layers.elementwise_max( + tensor.fill_constant_batch_size_like( + input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), + loss_part2) + return loss_part3 diff --git a/models/recall/youtube_dnn/config.yaml b/models/recall/youtube_dnn/config.yaml index 6cffbaba0abe7b42dfb653b1876f71936827a7bc..5bbc41a9e850044101fa844fca256db358dc1754 100644 --- a/models/recall/youtube_dnn/config.yaml +++ b/models/recall/youtube_dnn/config.yaml @@ -13,37 +13,42 @@ # limitations under the License. -train: - trainer: - # for cluster training - strategy: "async" +workspace: "paddlerec.models.recall.youtube_dnn" - epochs: 3 - workspace: "paddlerec.models.recall.youtube_dnn" - device: cpu +dataset: +- name: dataset_train + batch_size: 5 + type: DataLoader + #type: QueueDataset + data_path: "{workspace}/data/train" + data_converter: "{workspace}/random_reader.py" + +hyper_parameters: + watch_vec_size: 64 + search_vec_size: 64 + other_feat_size: 64 + output_size: 100 + layers: [128, 64, 32] + optimizer: + class: adam + learning_rate: 0.001 + strategy: async - reader: - batch_size: 2 - class: "{workspace}/random_reader.py" - train_data_path: "{workspace}/data/train" +mode: train_runner - model: - models: "{workspace}/model.py" - hyper_parameters: - watch_vec_size: 64 - search_vec_size: 64 - other_feat_size: 64 - output_size: 100 - layers: [128, 64, 32] - learning_rate: 0.01 - optimizer: sgd +runner: +- name: train_runner + class: single_train + device: cpu + epochs: 3 + save_checkpoint_interval: 2 + save_inference_interval: 4 + save_checkpoint_path: "increment" + save_inference_path: "inference" + print_interval: 10 - save: - increment: - dirname: "increment" - epoch_interval: 2 - save_last: True - inference: - dirname: "inference" - epoch_interval: 4 - save_last: True +phase: +- name: train + model: "{workspace}/model.py" + dataset_name: dataset_train + thread_num: 1 diff --git a/models/recall/youtube_dnn/model.py b/models/recall/youtube_dnn/model.py index 22953764d1f81218b2f3d4c232392fe741043fa3..a1203447c6a66404f270a8f65215eea5cd9e82c7 100644 --- a/models/recall/youtube_dnn/model.py +++ b/models/recall/youtube_dnn/model.py @@ -13,39 +13,64 @@ # limitations under the License. import math +import numpy as np import paddle.fluid as fluid from paddlerec.core.utils import envs from paddlerec.core.model import Model as ModelBase -import numpy as np class Model(ModelBase): def __init__(self, config): ModelBase.__init__(self, config) - def input_data(self, is_infer=False): + def _init_hyper_parameters(self): + self.watch_vec_size = envs.get_global_env( + "hyper_parameters.watch_vec_size") + self.search_vec_size = envs.get_global_env( + "hyper_parameters.search_vec_size") + self.other_feat_size = envs.get_global_env( + "hyper_parameters.other_feat_size") + self.output_size = envs.get_global_env("hyper_parameters.output_size") + self.layers = envs.get_global_env("hyper_parameters.layers") - watch_vec_size = envs.get_global_env("hyper_parameters.watch_vec_size", - None, self._namespace) - search_vec_size = envs.get_global_env( - "hyper_parameters.search_vec_size", None, self._namespace) - other_feat_size = envs.get_global_env( - "hyper_parameters.other_feat_size", None, self._namespace) + def input_data(self, is_infer=False, **kwargs): watch_vec = fluid.data( - name="watch_vec", shape=[None, watch_vec_size], dtype="float32") + name="watch_vec", + shape=[None, self.watch_vec_size], + dtype="float32") search_vec = fluid.data( - name="search_vec", shape=[None, search_vec_size], dtype="float32") + name="search_vec", + shape=[None, self.search_vec_size], + dtype="float32") other_feat = fluid.data( - name="other_feat", shape=[None, other_feat_size], dtype="float32") + name="other_feat", + shape=[None, self.other_feat_size], + dtype="float32") label = fluid.data(name="label", shape=[None, 1], dtype="int64") inputs = [watch_vec] + [search_vec] + [other_feat] + [label] - self._data_var = inputs return inputs - def fc(self, tag, data, out_dim, active='relu'): + def net(self, inputs, is_infer=False): + concat_feats = fluid.layers.concat(input=inputs[:-1], axis=-1) + + l1 = self._fc('l1', concat_feats, self.layers[0], 'relu') + l2 = self._fc('l2', l1, self.layers[1], 'relu') + l3 = self._fc('l3', l2, self.layers[2], 'relu') + l4 = self._fc('l4', l3, self.output_size, 'softmax') + + num_seqs = fluid.layers.create_tensor(dtype='int64') + acc = fluid.layers.accuracy(input=l4, label=inputs[-1], total=num_seqs) + + cost = fluid.layers.cross_entropy(input=l4, label=inputs[-1]) + avg_cost = fluid.layers.mean(cost) + + self._cost = avg_cost + self._metrics["acc"] = acc + + def _fc(self, tag, data, out_dim, active='relu'): init_stddev = 1.0 scales = 1.0 / np.sqrt(data.shape[1]) @@ -67,31 +92,3 @@ class Model(ModelBase): bias_attr=b_attr, name=tag) return out - - def net(self, inputs): - output_size = envs.get_global_env("hyper_parameters.output_size", None, - self._namespace) - layers = envs.get_global_env("hyper_parameters.layers", None, - self._namespace) - concat_feats = fluid.layers.concat(input=inputs[:-1], axis=-1) - - l1 = self.fc('l1', concat_feats, layers[0], 'relu') - l2 = self.fc('l2', l1, layers[1], 'relu') - l3 = self.fc('l3', l2, layers[2], 'relu') - l4 = self.fc('l4', l3, output_size, 'softmax') - - num_seqs = fluid.layers.create_tensor(dtype='int64') - acc = fluid.layers.accuracy(input=l4, label=inputs[-1], total=num_seqs) - - cost = fluid.layers.cross_entropy(input=l4, label=inputs[-1]) - avg_cost = fluid.layers.mean(cost) - - self._cost = avg_cost - self._metrics["acc"] = acc - - def train_net(self): - input_data = self.input_data() - self.net(input_data) - - def infer_net(self): - pass diff --git a/models/recall/youtube_dnn/random_reader.py b/models/recall/youtube_dnn/random_reader.py index 30df6d1d29cfdf75c7e7cf9b68643af582c9f49f..cdb0add6dbb358dba52ba9c933c060fec3ddf516 100644 --- a/models/recall/youtube_dnn/random_reader.py +++ b/models/recall/youtube_dnn/random_reader.py @@ -13,22 +13,22 @@ # limitations under the License. from __future__ import print_function +import numpy as np + from paddlerec.core.reader import Reader from paddlerec.core.utils import envs from collections import defaultdict -import numpy as np class TrainReader(Reader): def init(self): self.watch_vec_size = envs.get_global_env( - "hyper_parameters.watch_vec_size", None, "train.model") + "hyper_parameters.watch_vec_size") self.search_vec_size = envs.get_global_env( - "hyper_parameters.search_vec_size", None, "train.model") + "hyper_parameters.search_vec_size") self.other_feat_size = envs.get_global_env( - "hyper_parameters.other_feat_size", None, "train.model") - self.output_size = envs.get_global_env("hyper_parameters.output_size", - None, "train.model") + "hyper_parameters.other_feat_size") + self.output_size = envs.get_global_env("hyper_parameters.output_size") def generate_sample(self, line): """ diff --git a/models/rerank/listwise/config.yaml b/models/rerank/listwise/config.yaml index 18b018026634e461257d167fa543f2d81a25436c..2ddfa32fe08aa8bece00727aefc46bb893b4d090 100644 --- a/models/rerank/listwise/config.yaml +++ b/models/rerank/listwise/config.yaml @@ -12,44 +12,56 @@ # See the License for the specific language governing permissions and # limitations under the License. -evaluate: - reader: - batch_size: 1 - class: "{workspace}/random_infer_reader.py" - test_data_path: "{workspace}/data/train" -train: - trainer: - # for cluster training - strategy: "async" +workspace: "paddlerec.models.rerank.listwise" - epochs: 3 - workspace: "paddlerec.models.rerank.listwise" - device: cpu +dataset: +- name: dataset_train + type: DataLoader + data_path: "{workspace}/data/train" + data_converter: "{workspace}/random_reader.py" +- name: dataset_infer + type: DataLoader + data_path: "{workspace}/data/test" + data_converter: "{workspace}/random_reader.py" - reader: - batch_size: 2 - class: "{workspace}/random_reader.py" - train_data_path: "{workspace}/data/train" - dataset_class: "DataLoader" +hyper_parameters: + hidden_size: 128 + user_vocab: 200 + item_vocab: 1000 + item_len: 5 + embed_size: 16 + batch_size: 1 + optimizer: + class: sgd + learning_rate: 0.01 + strategy: async - model: - models: "{workspace}/model.py" - hyper_parameters: - hidden_size: 128 - user_vocab: 200 - item_vocab: 1000 - item_len: 5 - embed_size: 16 - learning_rate: 0.01 - optimizer: sgd +#use infer_runner mode and modify 'phase' below if infer +mode: train_runner +#mode: infer_runner + +runner: +- name: train_runner + class: single_train + device: cpu + epochs: 3 + save_checkpoint_interval: 2 + save_inference_interval: 4 + save_checkpoint_path: "increment" + save_inference_path: "inference" +- name: infer_runner + class: single_infer + init_model_path: "increment/0" + device: cpu + epochs: 3 - save: - increment: - dirname: "increment" - epoch_interval: 2 - save_last: True - inference: - dirname: "inference" - epoch_interval: 4 - save_last: True +phase: +- name: train + model: "{workspace}/model.py" + dataset_name: dataset_train + thread_num: 1 + #- name: infer + # model: "{workspace}/model.py" + # dataset_name: dataset_infer + # thread_num: 1 diff --git a/models/rerank/listwise/model.py b/models/rerank/listwise/model.py index d4cf9d8ed1a669d6d1ff3339008605f1aa26f4cd..d588db0629439eec9396ec9b1f81f1988e99d51e 100644 --- a/models/rerank/listwise/model.py +++ b/models/rerank/listwise/model.py @@ -25,18 +25,13 @@ class Model(ModelBase): ModelBase.__init__(self, config) def _init_hyper_parameters(self): - self.item_len = envs.get_global_env("hyper_parameters.self.item_len", - None, self._namespace) - self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size", - None, self._namespace) - self.user_vocab = envs.get_global_env("hyper_parameters.user_vocab", - None, self._namespace) - self.item_vocab = envs.get_global_env("hyper_parameters.item_vocab", - None, self._namespace) - self.embed_size = envs.get_global_env("hyper_parameters.embed_size", - None, self._namespace) - - def input_data(self, is_infer=False): + self.item_len = envs.get_global_env("hyper_parameters.self.item_len") + self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size") + self.user_vocab = envs.get_global_env("hyper_parameters.user_vocab") + self.item_vocab = envs.get_global_env("hyper_parameters.item_vocab") + self.embed_size = envs.get_global_env("hyper_parameters.embed_size") + + def input_data(self, is_infer=False, **kwargs): user_slot_names = fluid.data( name='user_slot_names', shape=[None, 1], diff --git a/models/rerank/listwise/random_reader.py b/models/rerank/listwise/random_reader.py index 41cf14b79285efe8f2d80e01bba74da3501cc504..aa7af3f083c720d35e9f11f5f5ec1bddd107cabc 100644 --- a/models/rerank/listwise/random_reader.py +++ b/models/rerank/listwise/random_reader.py @@ -23,14 +23,10 @@ from collections import defaultdict class TrainReader(Reader): def init(self): - self.user_vocab = envs.get_global_env("hyper_parameters.user_vocab", - None, "train.model") - self.item_vocab = envs.get_global_env("hyper_parameters.item_vocab", - None, "train.model") - self.item_len = envs.get_global_env("hyper_parameters.item_len", None, - "train.model") - self.batch_size = envs.get_global_env("batch_size", None, - "train.reader") + self.user_vocab = envs.get_global_env("hyper_parameters.user_vocab") + self.item_vocab = envs.get_global_env("hyper_parameters.item_vocab") + self.item_len = envs.get_global_env("hyper_parameters.item_len") + self.batch_size = envs.get_global_env("hyper_parameters.batch_size") def reader_creator(self): def reader(): diff --git a/models/rerank/readme.md b/models/rerank/readme.md index e7552c377dd03ab93af5c233ef8be31edc529de4..6f698daf9f9a7529abcb8d18010965988838a940 100755 --- a/models/rerank/readme.md +++ b/models/rerank/readme.md @@ -9,9 +9,6 @@ * [整体介绍](#整体介绍) * [重排序模型列表](#重排序模型列表) * [使用教程](#使用教程) - * [训练 预测](#训练 预测) -* [效果对比](#效果对比) - * [模型效果列表](#模型效果列表) ## 整体介绍 ### 融合模型列表 @@ -29,15 +26,11 @@
-## 使用教程 -### 训练 预测 +## 使用教程(快速开始) ```shell python -m paddlerec.run -m paddlerec.models.rerank.listwise # listwise ``` -## 效果对比 -### 模型效果列表 +## 使用教程(复现论文) -| 数据集 | 模型 | loss | auc | -| :------------------: | :--------------------: | :---------: |:---------: | -| -- | Listwise | -- | -- | +listwise原论文没有给出训练数据,我们使用了随机的数据,可参考快速开始 diff --git a/setup.py b/setup.py index aaa34a127a0570714023211266aadc1ddfaa0d47..8ad1cc742434aa39513a1c618b56649c3530686a 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,8 @@ def build(dirname): models_copy = [ 'data/*.txt', 'data/*/*.txt', '*.yaml', '*.sh', 'tree/*.npy', - 'tree/*.txt', 'data/sample_data/*', 'data/sample_data/train/*', 'data/*/*.csv' + 'tree/*.txt', 'data/sample_data/*', 'data/sample_data/train/*', + 'data/sample_data/infer/*', 'data/*/*.csv' ] engine_copy = ['*/*.sh']