From 5d701fec791a463e78173bcc5c03661e0666f46f Mon Sep 17 00:00:00 2001 From: zhangwenhui03 Date: Fri, 15 May 2020 15:42:27 +0800 Subject: [PATCH] add share-bottom infer --- .../share-bottom/census_infer_reader.py | 50 +++++++++++++++++++ models/multitask/share-bottom/config.yaml | 6 +++ models/multitask/share-bottom/model.py | 22 +++++--- 3 files changed, 71 insertions(+), 7 deletions(-) create mode 100644 models/multitask/share-bottom/census_infer_reader.py diff --git a/models/multitask/share-bottom/census_infer_reader.py b/models/multitask/share-bottom/census_infer_reader.py new file mode 100644 index 00000000..c25ccea8 --- /dev/null +++ b/models/multitask/share-bottom/census_infer_reader.py @@ -0,0 +1,50 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function + +from paddlerec.core.reader import Reader +from paddlerec.core.utils import envs +import numpy as np + + +class EvaluateReader(Reader): + def init(self): + pass + + def generate_sample(self, line): + """ + Read the data line by line and process it as a dictionary + """ + + def reader(): + """ + This function needs to be implemented by the user, based on data format + """ + l = line.strip().split(',') + l = list(map(float, l)) + label_income = [] + label_marital = [] + data = l[2:] + if int(l[1]) == 0: + label_income = [1, 0] + elif int(l[1]) == 1: + label_income = [0, 1] + if int(l[0]) == 0: + label_marital = [1, 0] + elif int(l[0]) == 1: + label_marital = [0, 1] + feature_name = ["input", "label_income", "label_marital"] + yield zip(feature_name, [data] + [label_income] + [label_marital]) + + return reader diff --git a/models/multitask/share-bottom/config.yaml b/models/multitask/share-bottom/config.yaml index 64d61ed4..591b6800 100644 --- a/models/multitask/share-bottom/config.yaml +++ b/models/multitask/share-bottom/config.yaml @@ -12,6 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +evaluate: + reader: + batch_size: 1 + class: "{workspace}/census_infer_reader.py" + test_data_path: "{workspace}/data/train" + train: trainer: # for cluster training diff --git a/models/multitask/share-bottom/model.py b/models/multitask/share-bottom/model.py index 7328695f..a9172995 100644 --- a/models/multitask/share-bottom/model.py +++ b/models/multitask/share-bottom/model.py @@ -23,7 +23,7 @@ class Model(ModelBase): def __init__(self, config): ModelBase.__init__(self, config) - def train(self): + def model(self, is_infer=False): feature_size = envs.get_global_env("hyper_parameters.feature_size", None, self._namespace) bottom_size = envs.get_global_env("hyper_parameters.bottom_size", None, self._namespace) @@ -34,6 +34,11 @@ class Model(ModelBase): label_income = fluid.data(name="label_income", shape=[-1, 2], dtype="float32", lod_level=0) label_marital = fluid.data(name="label_marital", shape=[-1, 2], dtype="float32", lod_level=0) + if is_infer: + self._infer_data_var = [input_data, label_income, label_marital] + self._infer_data_loader = fluid.io.DataLoader.from_generator( + feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False) + self._data_var.extend([input_data, label_income, label_marital]) bottom_output = fluid.layers.fc(input=input_data, @@ -60,16 +65,19 @@ class Model(ModelBase): pred_income = fluid.layers.clip(output_layers[0], min=1e-15, max=1.0 - 1e-15) pred_marital = fluid.layers.clip(output_layers[1], min=1e-15, max=1.0 - 1e-15) - cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income,soft_label = True) - cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital,soft_label = True) - - label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2]) label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2]) auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income, label=fluid.layers.cast(x=label_income_1, dtype='int64')) auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital, label=fluid.layers.cast(x=label_marital_1, dtype='int64')) + if is_infer: + self._infer_results["AUC_income"] = auc_income + self._infer_results["AUC_marital"] = auc_marital + return + + cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income,soft_label = True) + cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital,soft_label = True) cost = fluid.layers.elementwise_add(cost_income, cost_marital, axis=1) avg_cost = fluid.layers.mean(x=cost) @@ -82,8 +90,8 @@ class Model(ModelBase): def train_net(self): - self.train() + self.model() def infer_net(self): - pass + self.model(is_infer=True) -- GitLab