model.py 8.2 KB
Newer Older
T
tangwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

T
tangwei 已提交
15
import abc
C
Chengmo 已提交
16
import os
T
tangwei 已提交
17
import paddle.fluid as fluid
M
malin10 已提交
18
from paddle.fluid.framework import Variable
T
tangwei 已提交
19

M
malin10 已提交
20
from paddlerec.core.metric import Metric
21
from paddlerec.core.utils import envs
T
tangwei 已提交
22

T
tangwei 已提交
23

C
Chengmo 已提交
24
class ModelBase(object):
T
tangwei 已提交
25
    """Base Model
T
tangwei 已提交
26 27 28 29 30 31 32 33 34
    """
    __metaclass__ = abc.ABCMeta

    def __init__(self, config):
        """R
        """
        self._cost = None
        self._metrics = {}
        self._data_var = []
M
malin10 已提交
35 36
        self._infer_data_var = []
        self._infer_results = {}
T
tangwei 已提交
37
        self._data_loader = None
M
malin10 已提交
38
        self._infer_data_loader = None
T
tangwei 已提交
39
        self._fetch_interval = 20
T
tangwei 已提交
40
        self._platform = envs.get_platform()
F
frankwhzhang 已提交
41
        self._init_hyper_parameters()
T
tangwei 已提交
42
        self._env = config
X
fix  
xjqbest 已提交
43
        self._slot_inited = False
M
update  
malin10 已提交
44
        self._clear_metrics = None
F
frankwhzhang 已提交
45 46 47

    def _init_hyper_parameters(self):
        pass
X
xujiaqi01 已提交
48

X
fix  
xjqbest 已提交
49 50 51 52 53
    def _init_slots(self, **kargs):
        if self._slot_inited:
            return
        self._slot_inited = True
        dataset = {}
X
fix  
xjqbest 已提交
54
        model_dict = {}
T
tangwei 已提交
55
        for i in self._env["phase"]:
X
fix  
xjqbest 已提交
56 57 58
            if i["name"] == kargs["name"]:
                model_dict = i
                break
T
tangwei 已提交
59
        for i in self._env["dataset"]:
X
fix  
xjqbest 已提交
60 61 62 63
            if i["name"] == model_dict["dataset_name"]:
                dataset = i
                break
        name = "dataset." + dataset["name"] + "."
X
fix  
xjqbest 已提交
64 65 66 67 68 69 70 71 72 73 74
        sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip()
        dense_slots = envs.get_global_env(name + "dense_slots", "").strip()
        if sparse_slots != "" or dense_slots != "":
            if sparse_slots == "":
                sparse_slots = []
            else:
                sparse_slots = sparse_slots.strip().split(" ")
            if dense_slots == "":
                dense_slots = []
            else:
                dense_slots = dense_slots.strip().split(" ")
T
tangwei 已提交
75 76 77
            dense_slots_shape = [[
                int(j) for j in i.split(":")[1].strip("[]").split(",")
            ] for i in dense_slots]
X
xujiaqi01 已提交
78 79 80
            dense_slots = [i.split(":")[0] for i in dense_slots]
            self._dense_data_var = []
            for i in range(len(dense_slots)):
T
tangwei 已提交
81 82 83 84
                l = fluid.layers.data(
                    name=dense_slots[i],
                    shape=dense_slots_shape[i],
                    dtype="float32")
X
xujiaqi01 已提交
85 86 87 88
                self._data_var.append(l)
                self._dense_data_var.append(l)
            self._sparse_data_var = []
            for name in sparse_slots:
T
tangwei 已提交
89 90
                l = fluid.layers.data(
                    name=name, shape=[1], lod_level=1, dtype="int64")
X
xujiaqi01 已提交
91 92 93
                self._data_var.append(l)
                self._sparse_data_var.append(l)

T
tangwei 已提交
94
        dataset_class = envs.get_global_env(name + "type")
X
fix  
xjqbest 已提交
95 96
        if dataset_class == "DataLoader":
            self._init_dataloader()
X
xujiaqi01 已提交
97

X
fix  
xjqbest 已提交
98 99 100 101 102
    def _init_dataloader(self, is_infer=False):
        if is_infer:
            data = self._infer_data_var
        else:
            data = self._data_var
X
xujiaqi01 已提交
103
        self._data_loader = fluid.io.DataLoader.from_generator(
X
fix  
xjqbest 已提交
104
            feed_list=data,
T
tangwei 已提交
105 106 107
            capacity=64,
            use_double_buffer=False,
            iterable=False)
T
tangwei 已提交
108 109 110 111

    def get_inputs(self):
        return self._data_var

M
malin10 已提交
112 113 114
    def get_infer_inputs(self):
        return self._infer_data_var

M
update  
malin10 已提交
115 116 117 118 119 120 121 122 123
    def get_clear_metrics(self):
        if self._clear_metrics is not None:
            return self._clear_metrics
        self._clear_metrics = []
        for key in self._infer_results:
            if isinstance(self._infer_results[key], Metric):
                self._clear_metrics.append(self._infer_results[key])
        return self._clear_metrics

M
malin10 已提交
124
    def get_infer_results(self):
M
update  
malin10 已提交
125 126 127 128 129 130 131
        res = dict()
        for key in self._infer_results:
            if isinstance(self._infer_results[key], Metric):
                res.update(self._infer_results[key].get_result())
            elif isinstance(self._infer_results[key], Variable):
                res[key] = self._infer_results[key]
        return res
M
malin10 已提交
132

T
tangwei 已提交
133
    def get_avg_cost(self):
T
tangwei 已提交
134 135 136 137 138 139 140
        """R
        """
        return self._cost

    def get_metrics(self):
        """R
        """
M
malin10 已提交
141 142 143 144 145 146 147
        res = dict()
        for key in self._metrics:
            if isinstance(self._metrics[key], Metric):
                res.update(self._metrics[key].get_result())
            elif isinstance(self._metrics[key], Variable):
                res[key] = self._metrics[key]
        return res
T
tangwei 已提交
148 149 150 151

    def get_fetch_period(self):
        return self._fetch_interval

X
test  
xjqbest 已提交
152
    def _build_optimizer(self, name, lr, strategy=None):
T
tangwei 已提交
153 154 155
        name = name.upper()
        optimizers = ["SGD", "ADAM", "ADAGRAD"]
        if name not in optimizers:
C
chengmo 已提交
156 157
            raise ValueError(
                "configured optimizer can only supported SGD/Adam/Adagrad")
T
tangwei 已提交
158

C
Chengmo 已提交
159 160 161 162 163
        if name == "SGD":
            os.environ["FLAGS_communicator_is_sgd_optimizer"] = '1'
        else:
            os.environ["FLAGS_communicator_is_sgd_optimizer"] = '0'

T
tangwei 已提交
164
        if name == "SGD":
T
tangwei 已提交
165
            optimizer_i = fluid.optimizer.SGD(lr)
T
tangwei 已提交
166 167 168
        elif name == "ADAM":
            optimizer_i = fluid.optimizer.Adam(lr, lazy_mode=True)
        elif name == "ADAGRAD":
169
            optimizer_i = fluid.optimizer.Adagrad(lr)
T
tangwei 已提交
170
        else:
C
chengmo 已提交
171 172
            raise ValueError(
                "configured optimizer can only supported SGD/Adam/Adagrad")
T
tangwei 已提交
173 174 175 176

        return optimizer_i

    def optimizer(self):
M
malin10 已提交
177
        opt_name = envs.get_global_env("hyper_parameters.optimizer.class")
M
malin10 已提交
178 179
        opt_lr = envs.get_global_env(
            "hyper_parameters.optimizer.learning_rate")
180 181 182 183 184 185 186
        if not isinstance(opt_lr, (float, Variable)):
            try:
                opt_lr = float(opt_lr)
            except ValueError:
                raise ValueError(
                    "In your config yaml, 'learning_rate': %s must be written as a floating piont number,such as 0.001 or 1e-3"
                    % opt_lr)
M
malin10 已提交
187 188 189
        opt_strategy = envs.get_global_env(
            "hyper_parameters.optimizer.strategy")

M
malin10 已提交
190
        return self._build_optimizer(opt_name, opt_lr, opt_strategy)
T
tangwei 已提交
191

X
fix  
xjqbest 已提交
192 193
    def input_data(self, is_infer=False, **kwargs):
        name = "dataset." + kwargs.get("dataset_name") + "."
X
fix  
xjqbest 已提交
194 195
        sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip()
        dense_slots = envs.get_global_env(name + "dense_slots", "").strip()
X
fix  
xjqbest 已提交
196 197
        self._sparse_data_var_map = {}
        self._dense_data_var_map = {}
X
fix  
xjqbest 已提交
198 199 200 201 202 203 204 205 206
        if sparse_slots != "" or dense_slots != "":
            if sparse_slots == "":
                sparse_slots = []
            else:
                sparse_slots = sparse_slots.strip().split(" ")
            if dense_slots == "":
                dense_slots = []
            else:
                dense_slots = dense_slots.strip().split(" ")
207 208 209 210 211 212 213 214 215 216 217 218 219
            dense_slots_shape = [[
                int(j) for j in i.split(":")[1].strip("[]").split(",")
            ] for i in dense_slots]
            dense_slots = [i.split(":")[0] for i in dense_slots]
            self._dense_data_var = []
            data_var_ = []
            for i in range(len(dense_slots)):
                l = fluid.layers.data(
                    name=dense_slots[i],
                    shape=dense_slots_shape[i],
                    dtype="float32")
                data_var_.append(l)
                self._dense_data_var.append(l)
X
fix  
xjqbest 已提交
220
                self._dense_data_var_map[dense_slots[i]] = l
221 222 223 224 225 226
            self._sparse_data_var = []
            for name in sparse_slots:
                l = fluid.layers.data(
                    name=name, shape=[1], lod_level=1, dtype="int64")
                data_var_.append(l)
                self._sparse_data_var.append(l)
X
fix  
xjqbest 已提交
227
                self._sparse_data_var_map[name] = l
228 229 230 231
            return data_var_

        else:
            return None
F
frankwhzhang 已提交
232 233 234 235

    def net(self, is_infer=False):
        return None

T
tangwei 已提交
236
    def train_net(self):
T
tangwei 已提交
237
        pass
T
tangwei 已提交
238 239

    def infer_net(self):
T
tangwei 已提交
240
        pass