diff --git a/models/multitask/mmoe/census_infer_reader.py b/models/multitask/mmoe/census_infer_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..c25ccea8c4416bbfe64d3cdda59f5ee13b0cfac1 --- /dev/null +++ b/models/multitask/mmoe/census_infer_reader.py @@ -0,0 +1,50 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function + +from paddlerec.core.reader import Reader +from paddlerec.core.utils import envs +import numpy as np + + +class EvaluateReader(Reader): + def init(self): + pass + + def generate_sample(self, line): + """ + Read the data line by line and process it as a dictionary + """ + + def reader(): + """ + This function needs to be implemented by the user, based on data format + """ + l = line.strip().split(',') + l = list(map(float, l)) + label_income = [] + label_marital = [] + data = l[2:] + if int(l[1]) == 0: + label_income = [1, 0] + elif int(l[1]) == 1: + label_income = [0, 1] + if int(l[0]) == 0: + label_marital = [1, 0] + elif int(l[0]) == 1: + label_marital = [0, 1] + feature_name = ["input", "label_income", "label_marital"] + yield zip(feature_name, [data] + [label_income] + [label_marital]) + + return reader diff --git a/models/multitask/mmoe/config.yaml b/models/multitask/mmoe/config.yaml index e537b81e01174af36a449e36b5bac2412f06d9d5..e23332cda298cf0f9fd0d35b19f8fe8feb34a9b1 100644 --- a/models/multitask/mmoe/config.yaml +++ b/models/multitask/mmoe/config.yaml @@ -12,6 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +evaluate: + reader: + batch_size: 1 + class: "{workspace}/census_infer_reader.py" + test_data_path: "{workspace}/data/train" + train: trainer: # for cluster training @@ -22,7 +28,7 @@ train: device: cpu reader: - batch_size: 2 + batch_size: 1 class: "{workspace}/census_reader.py" train_data_path: "{workspace}/data/train" diff --git a/models/multitask/mmoe/model.py b/models/multitask/mmoe/model.py index bdbbba090a5e236b23c12d9bbfc3cc4a3c13b173..753bacae77f089dc709e34812ef109ac06aebcfb 100644 --- a/models/multitask/mmoe/model.py +++ b/models/multitask/mmoe/model.py @@ -23,7 +23,7 @@ class Model(ModelBase): def __init__(self, config): ModelBase.__init__(self, config) - def MMOE(self): + def MMOE(self, is_infer=False): feature_size = envs.get_global_env("hyper_parameters.feature_size", None, self._namespace) expert_num = envs.get_global_env("hyper_parameters.expert_num", None, self._namespace) @@ -34,6 +34,10 @@ class Model(ModelBase): input_data = fluid.data(name="input", shape=[-1, feature_size], dtype="float32") label_income = fluid.data(name="label_income", shape=[-1, 2], dtype="float32", lod_level=0) label_marital = fluid.data(name="label_marital", shape=[-1, 2], dtype="float32", lod_level=0) + if is_infer: + self._infer_data_var = [input_data, label_income, label_marital] + self._infer_data_loader = fluid.io.DataLoader.from_generator( + feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False) self._data_var.extend([input_data, label_income, label_marital]) # f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper @@ -75,14 +79,19 @@ class Model(ModelBase): pred_income = fluid.layers.clip(output_layers[0], min=1e-15, max=1.0 - 1e-15) pred_marital = fluid.layers.clip(output_layers[1], min=1e-15, max=1.0 - 1e-15) - cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income,soft_label = True) - cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital,soft_label = True) label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2]) label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2]) auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income, label=fluid.layers.cast(x=label_income_1, dtype='int64')) auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital, label=fluid.layers.cast(x=label_marital_1, dtype='int64')) + if is_infer: + self._infer_results["AUC_income"] = auc_income + self._infer_results["AUC_marital"] = auc_marital + return + + cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income,soft_label = True) + cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital,soft_label = True) avg_cost_income = fluid.layers.mean(x=cost_income) avg_cost_marital = fluid.layers.mean(x=cost_marital) @@ -101,4 +110,4 @@ class Model(ModelBase): def infer_net(self): - pass + self.MMOE(is_infer=True)