Merge branch 'develop' into 'develop'

add mmoe share-bottom infer See merge request !37

Merge branch 'develop' into 'develop'
add mmoe share-bottom infer See merge request !37
ad5022d2 · zhangwenhui03 · 5e300b8c · 52d3c0a5 · ad5022d2 · ad5022d2
6 changed file
--- a/models/multitask/mmoe/census_infer_reader.py
+++ b/models/multitask/mmoe/census_infer_reader.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+
+from paddlerec.core.reader import Reader
+from paddlerec.core.utils import envs
+import numpy as np
+
+
+class EvaluateReader(Reader):
+    def init(self):
+        pass
+
+    def generate_sample(self, line):
+        """
+        Read the data line by line and process it as a dictionary
+        """
+
+        def reader():
+            """
+            This function needs to be implemented by the user, based on data format
+            """
+            l = line.strip().split(',')
+            l = list(map(float, l))
+            label_income = []
+            label_marital = []
+            data = l[2:]
+            if int(l[1]) == 0:
+                label_income = [1, 0]
+            elif int(l[1]) == 1:
+                label_income = [0, 1]
+            if int(l[0]) == 0:
+                label_marital = [1, 0]
+            elif int(l[0]) == 1:
+                label_marital = [0, 1]
+            feature_name = ["input", "label_income", "label_marital"]
+            yield zip(feature_name, [data] + [label_income] + [label_marital])
+
+        return reader
--- a/models/multitask/mmoe/config.yaml
+++ b/models/multitask/mmoe/config.yaml
@@ -12,6 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+evaluate:
+  reader:
+    batch_size: 1
+    class: "{workspace}/census_infer_reader.py"
+    test_data_path: "{workspace}/data/train"
+
 train:
  trainer:
    # for cluster training
@@ -22,7 +28,7 @@ train:
  device: cpu

  reader:
-    batch_size: 2
+    batch_size: 1
    class: "{workspace}/census_reader.py"
    train_data_path: "{workspace}/data/train"


--- a/models/multitask/mmoe/model.py
+++ b/models/multitask/mmoe/model.py
@@ -23,7 +23,7 @@ class Model(ModelBase):
    def __init__(self, config):
        ModelBase.__init__(self, config)

-    def MMOE(self):
+    def MMOE(self, is_infer=False):

        feature_size = envs.get_global_env("hyper_parameters.feature_size", None, self._namespace)
        expert_num = envs.get_global_env("hyper_parameters.expert_num", None, self._namespace)
@@ -34,6 +34,10 @@ class Model(ModelBase):
        input_data = fluid.data(name="input", shape=[-1, feature_size], dtype="float32")
        label_income = fluid.data(name="label_income", shape=[-1, 2], dtype="float32", lod_level=0)
        label_marital = fluid.data(name="label_marital", shape=[-1, 2], dtype="float32", lod_level=0)
+        if is_infer:
+            self._infer_data_var = [input_data, label_income, label_marital]
+            self._infer_data_loader = fluid.io.DataLoader.from_generator(
+                    feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
        
        self._data_var.extend([input_data, label_income, label_marital])
        # f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper
@@ -75,14 +79,19 @@ class Model(ModelBase):
        pred_income = fluid.layers.clip(output_layers[0], min=1e-15, max=1.0 - 1e-15)
        pred_marital = fluid.layers.clip(output_layers[1], min=1e-15, max=1.0 - 1e-15)

-        cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income,soft_label = True)
-        cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital,soft_label = True)
        
        label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2])
        label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2])
        
        auc_income, batch_auc_1, auc_states_1  = fluid.layers.auc(input=pred_income, label=fluid.layers.cast(x=label_income_1, dtype='int64'))
        auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital, label=fluid.layers.cast(x=label_marital_1, dtype='int64'))
+        if is_infer:
+            self._infer_results["AUC_income"] = auc_income
+            self._infer_results["AUC_marital"] = auc_marital
+            return
+
+        cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income,soft_label = True)
+        cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital,soft_label = True)
        
        avg_cost_income = fluid.layers.mean(x=cost_income)
        avg_cost_marital = fluid.layers.mean(x=cost_marital)
@@ -101,4 +110,4 @@ class Model(ModelBase):


    def infer_net(self):
-        pass
+        self.MMOE(is_infer=True)
--- a/models/multitask/share-bottom/census_infer_reader.py
+++ b/models/multitask/share-bottom/census_infer_reader.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+
+from paddlerec.core.reader import Reader
+from paddlerec.core.utils import envs
+import numpy as np
+
+
+class EvaluateReader(Reader):
+    def init(self):
+        pass
+
+    def generate_sample(self, line):
+        """
+        Read the data line by line and process it as a dictionary
+        """
+
+        def reader():
+            """
+            This function needs to be implemented by the user, based on data format
+            """
+            l = line.strip().split(',')
+            l = list(map(float, l))
+            label_income = []
+            label_marital = []
+            data = l[2:]
+            if int(l[1]) == 0:
+                label_income = [1, 0]
+            elif int(l[1]) == 1:
+                label_income = [0, 1]
+            if int(l[0]) == 0:
+                label_marital = [1, 0]
+            elif int(l[0]) == 1:
+                label_marital = [0, 1]
+            feature_name = ["input", "label_income", "label_marital"]
+            yield zip(feature_name, [data] + [label_income] + [label_marital])
+
+        return reader
--- a/models/multitask/share-bottom/config.yaml
+++ b/models/multitask/share-bottom/config.yaml
@@ -12,6 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+evaluate:
+  reader:
+    batch_size: 1
+    class: "{workspace}/census_infer_reader.py"
+    test_data_path: "{workspace}/data/train"
+
 train:
  trainer:
    # for cluster training

--- a/models/multitask/share-bottom/model.py
+++ b/models/multitask/share-bottom/model.py
@@ -23,7 +23,7 @@ class Model(ModelBase):
    def __init__(self, config):
        ModelBase.__init__(self, config)

-    def train(self):
+    def model(self, is_infer=False):

        feature_size = envs.get_global_env("hyper_parameters.feature_size", None, self._namespace)
        bottom_size = envs.get_global_env("hyper_parameters.bottom_size", None, self._namespace)
@@ -34,6 +34,11 @@ class Model(ModelBase):
        label_income = fluid.data(name="label_income", shape=[-1, 2], dtype="float32", lod_level=0)
        label_marital = fluid.data(name="label_marital", shape=[-1, 2], dtype="float32", lod_level=0)
        
+        if is_infer:
+            self._infer_data_var = [input_data, label_income, label_marital]
+            self._infer_data_loader = fluid.io.DataLoader.from_generator(
+                    feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
+
        self._data_var.extend([input_data, label_income, label_marital])

        bottom_output = fluid.layers.fc(input=input_data,
@@ -60,16 +65,19 @@ class Model(ModelBase):
        pred_income = fluid.layers.clip(output_layers[0], min=1e-15, max=1.0 - 1e-15)
        pred_marital = fluid.layers.clip(output_layers[1], min=1e-15, max=1.0 - 1e-15)

-        cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income,soft_label = True)
-        cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital,soft_label = True)
-        
-
        label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2])
        label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2])
        
        auc_income, batch_auc_1, auc_states_1  = fluid.layers.auc(input=pred_income, label=fluid.layers.cast(x=label_income_1, dtype='int64'))
        auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital, label=fluid.layers.cast(x=label_marital_1, dtype='int64'))

+        if is_infer:
+            self._infer_results["AUC_income"] = auc_income
+            self._infer_results["AUC_marital"] = auc_marital
+            return
+
+        cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income,soft_label = True)
+        cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital,soft_label = True)
        cost = fluid.layers.elementwise_add(cost_income, cost_marital, axis=1)
        
        avg_cost =  fluid.layers.mean(x=cost)
@@ -82,8 +90,8 @@ class Model(ModelBase):


    def train_net(self):
-        self.train()
+        self.model()


    def infer_net(self):
-        pass
+        self.model(is_infer=True)