layers.py 9.5 KB
Newer Older
T
tangwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

X
xiexionghang 已提交
15
import paddle.fluid as fluid
T
rename  
tangwei 已提交
16
from fleetrec.core.layer import Layer
X
xiexionghang 已提交
17

X
xiexionghang 已提交
18

X
xiexionghang 已提交
19
class EmbeddingInputLayer(Layer):
X
xiexionghang 已提交
20 21
    """R
    """
T
tangwei 已提交
22

X
xiexionghang 已提交
23
    def __init__(self, config):
X
xiexionghang 已提交
24 25
        """R
        """
X
xiexionghang 已提交
26 27
        self._cvm = config['cvm']
        self._name = config['name']
X
xiexionghang 已提交
28
        self._slots = [str(slot) for slot in config['slots']]
X
xiexionghang 已提交
29 30
        self._mf_dim = config['mf_dim']
        self._backward = config['backward']
T
tangwei 已提交
31
        self._emb_dim = self._mf_dim + 3  # append show ctr lr
X
xiexionghang 已提交
32
        self._emb_layers = []
T
tangwei 已提交
33 34

    def generate_fluid(self, param):
X
xiexionghang 已提交
35 36
        """R
        """
X
xiexionghang 已提交
37 38 39 40 41 42 43
        show_clk = fluid.layers.concat(
            [param['layer']['show'], param['layer']['click']], axis=1)
        show_clk.stop_gradient = True
        data_var = []
        for slot in self._slots:
            l = fluid.layers.data(name=slot, shape=[1], dtype="int64", lod_level=1)
            data_var.append(l)
X
xiexionghang 已提交
44
            emb = fluid.layers.embedding(input=l, size=[10, self._emb_dim], \
T
tangwei 已提交
45 46
                                         is_sparse=True, is_distributed=True,
                                         param_attr=fluid.ParamAttr(name="embedding"))
X
xiexionghang 已提交
47 48 49 50
            emb = fluid.layers.sequence_pool(input=emb, pool_type='sum')
            emb = fluid.layers.continuous_value_model(emb, show_clk, self._cvm)
            self._emb_layers.append(emb)
        output = fluid.layers.concat(input=self._emb_layers, axis=1, name=self._name)
X
xiexionghang 已提交
51
        return output, {'data_var': data_var}
X
xiexionghang 已提交
52

X
xiexionghang 已提交
53

X
xiexionghang 已提交
54
class LabelInputLayer(Layer):
X
xiexionghang 已提交
55 56
    """R
    """
T
tangwei 已提交
57

X
xiexionghang 已提交
58
    def __init__(self, config):
X
xiexionghang 已提交
59 60
        """R
        """
X
xiexionghang 已提交
61 62 63 64 65
        self._name = config['name']
        self._dim = config.get('dim', 1)
        self._data_type = config.get('data_type', "int64")
        self._label_idx = config['label_idx']

T
tangwei 已提交
66
    def generate_fluid(self, param):
X
xiexionghang 已提交
67 68 69
        """R
        """
        label = fluid.layers.data(name=self._name, shape=[-1, self._dim], \
T
tangwei 已提交
70
                                  dtype=self._data_type, lod_level=0, append_batch_size=False)
X
xiexionghang 已提交
71 72
        cast_label = fluid.layers.cast(label, dtype='float32')
        cast_label.stop_gradient = True
X
xiexionghang 已提交
73 74
        return cast_label, {'data_var': [label]}

X
xiexionghang 已提交
75

T
tangwei 已提交
76
class TagInputLayer(Layer):
X
xiexionghang 已提交
77 78
    """R
    """
T
tangwei 已提交
79

X
xiexionghang 已提交
80
    def __init__(self, config):
X
xiexionghang 已提交
81 82
        """R
        """
X
xiexionghang 已提交
83 84 85 86 87
        self._name = config['name']
        self._tag = config['tag']
        self._dim = config.get('dim', 1)
        self._data_type = config['data_type']

T
tangwei 已提交
88
    def generate_fluid(self, param):
X
xiexionghang 已提交
89 90 91
        """R
        """
        output = fluid.layers.data(name=self._name, shape=[-1, self._dim], \
T
tangwei 已提交
92
                                   dtype=self._data_type, lod_level=0, append_batch_size=False, stop_gradient=True)
X
xiexionghang 已提交
93 94
        return output, {'data_var': [output]}

T
tangwei 已提交
95 96

class ParamLayer(Layer):
X
xiexionghang 已提交
97 98
    """R
    """
T
tangwei 已提交
99

X
xiexionghang 已提交
100
    def __init__(self, config):
X
xiexionghang 已提交
101 102
        """R
        """
X
xiexionghang 已提交
103 104 105 106 107 108 109
        self._name = config['name']
        self._coln = config['coln']
        self._table_id = config.get('table_id', -1)
        self._init_range = config.get('init_range', 1)
        self._data_type = config.get('data_type', 'float32')
        self._config = config

T
tangwei 已提交
110
    def generate_fluid(self, param):
X
xiexionghang 已提交
111 112
        """R
        """
T
tangwei 已提交
113
        return self._config, {'inference_param': {'name': 'param', 'params': [], 'table_id': self._table_id}}
X
xiexionghang 已提交
114

X
xiexionghang 已提交
115

T
tangwei 已提交
116
class SummaryLayer(Layer):
X
xiexionghang 已提交
117 118
    """R
    """
T
tangwei 已提交
119

X
xiexionghang 已提交
120
    def __init__(self, config):
X
xiexionghang 已提交
121 122
        """R
        """
X
xiexionghang 已提交
123 124 125 126 127
        self._name = config['name']
        self._table_id = config.get('table_id', -1)
        self._data_type = config.get('data_type', 'float32')
        self._config = config

T
tangwei 已提交
128
    def generate_fluid(self, param):
X
xiexionghang 已提交
129 130
        """R
        """
T
tangwei 已提交
131
        return self._config, {'inference_param': {'name': 'summary', 'params': [], 'table_id': self._table_id}}
X
xiexionghang 已提交
132

X
xiexionghang 已提交
133

T
tangwei 已提交
134
class NormalizetionLayer(Layer):
X
xiexionghang 已提交
135 136
    """R
    """
T
tangwei 已提交
137

X
xiexionghang 已提交
138
    def __init__(self, config):
X
xiexionghang 已提交
139 140
        """R
        """
X
xiexionghang 已提交
141 142
        self._name = config['name']
        self._input = config['input']
T
tangwei 已提交
143
        self._summary = config['summary']
X
xiexionghang 已提交
144 145
        self._table_id = config.get('table_id', -1)

T
tangwei 已提交
146
    def generate_fluid(self, param):
X
xiexionghang 已提交
147 148
        """R
        """
X
xiexionghang 已提交
149 150 151
        input_layer = param['layer'][self._input[0]]
        summary_layer = param['layer'][self._summary]
        if len(self._input) > 0:
T
tangwei 已提交
152
            input_list = [param['layer'][i] for i in self._input]
X
xiexionghang 已提交
153 154
            input_layer = fluid.layers.concat(input=input_list, axis=1)
        bn = fluid.layers.data_norm(input=input_layer, name=self._name, epsilon=1e-4, param_attr={
T
tangwei 已提交
155
            "batch_size": 1e4, "batch_sum_default": 0.0, "batch_square": 1e4})
X
xiexionghang 已提交
156
        inference_param = [self._name + '.batch_size', self._name + '.batch_sum', self._name + '.batch_square_sum']
T
tangwei 已提交
157 158
        return bn, {'inference_param': {'name': 'summary', \
                                        'params': inference_param, 'table_id': summary_layer.get('table_id', -1)}}
X
xiexionghang 已提交
159

X
xiexionghang 已提交
160

T
tangwei 已提交
161
class NeuralLayer(Layer):
X
xiexionghang 已提交
162 163
    """R
    """
T
tangwei 已提交
164

X
xiexionghang 已提交
165
    def __init__(self, config):
X
xiexionghang 已提交
166 167
        """R
        """
X
xiexionghang 已提交
168 169 170 171 172 173
        self._name = config['name']
        self._param = config['param']
        self._input = config['input']
        self._bias = config.get('bias', True)
        self._act_func = config.get('act_func', None)

T
tangwei 已提交
174
    def generate_fluid(self, param):
X
xiexionghang 已提交
175 176
        """R
        """
X
xiexionghang 已提交
177 178 179
        param_layer = param['layer'][self._param]
        input_layer = param['layer'][self._input[0]]
        if len(self._input) > 0:
T
tangwei 已提交
180
            input_list = [param['layer'][i] for i in self._input]
X
xiexionghang 已提交
181 182 183 184 185
            input_layer = fluid.layers.concat(input=input_list, axis=1)
        input_coln = input_layer.shape[1]
        scale = param_layer['init_range'] / (input_coln ** 0.5)
        bias = None
        if self._bias:
T
tangwei 已提交
186 187
            bias = fluid.ParamAttr(learning_rate=1.0,
                                   initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=scale))
X
xiexionghang 已提交
188
        fc = fluid.layers.fc(
T
tangwei 已提交
189 190 191 192 193
            name=self._name,
            input=input_layer,
            size=param_layer['coln'],
            act=self._act_func,
            param_attr= \
X
xiexionghang 已提交
194
                fluid.ParamAttr(learning_rate=1.0, \
T
tangwei 已提交
195 196
                                initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=scale)),
            bias_attr=bias)
X
xiexionghang 已提交
197
        inference_param = [self._name + '.w_0', self._name + '.b_0']
T
tangwei 已提交
198 199
        return fc, {'inference_param': {'name': 'param', 'params': inference_param, \
                                        'table_id': param_layer.get('table_id', -1)}}
X
xiexionghang 已提交
200

X
xiexionghang 已提交
201

X
xiexionghang 已提交
202
class SigmoidLossLayer(Layer):
X
xiexionghang 已提交
203 204
    """R
    """
T
tangwei 已提交
205

X
xiexionghang 已提交
206
    def __init__(self, config):
X
xiexionghang 已提交
207 208
        """R
        """
X
xiexionghang 已提交
209 210 211 212 213 214 215
        self._name = config['name']
        self._label = config['label']
        self._input = config['input']
        self._weight = config.get('weight', None)
        self._metric_label = config.get('metric_label', None)
        self._bound = config.get('bound', [-15.0, 15.0])
        self._extend_output = {
X
xiexionghang 已提交
216 217 218 219 220 221 222 223 224 225 226 227 228 229
            'metric_label': self._metric_label,
            'metric_dict': {
                'auc': {'var': None},
                'batch_auc': {'var': None},
                'stat_pos': {'var': None, 'data_type': 'int64'},
                'stat_neg': {'var': None, 'data_type': 'int64'},
                'batch_stat_pos': {'var': None, 'data_type': 'int64'},
                'batch_stat_neg': {'var': None, 'data_type': 'int64'},
                'pos_ins_num': {'var': None},
                'abserr': {'var': None},
                'sqrerr': {'var': None},
                'prob': {'var': None},
                'total_ins_num': {'var': None},
                'q': {'var': None}
X
xiexionghang 已提交
230 231
            }
        }
T
tangwei 已提交
232 233

    def generate_fluid(self, param):
X
xiexionghang 已提交
234 235
        """R
        """
X
xiexionghang 已提交
236 237
        input_layer = param['layer'][self._input[0]]
        label_layer = param['layer'][self._label]
X
xiexionghang 已提交
238
        output = fluid.layers.clip(input_layer, self._bound[0], self._bound[1], name=self._name)
X
xiexionghang 已提交
239 240 241 242 243 244 245
        norm = fluid.layers.sigmoid(output, name=self._name)
        output = fluid.layers.log_loss(norm, fluid.layers.cast(x=label_layer, dtype='float32'))
        if self._weight:
            weight_layer = param['layer'][self._weight]
            output = fluid.layers.elementwise_mul(output, weight_layer)
        output = fluid.layers.mean(x=output)
        self._extend_output['loss'] = output
T
tangwei 已提交
246 247

        # For AUC Metric
X
xiexionghang 已提交
248 249 250 251
        metric = self._extend_output['metric_dict']
        binary_predict = fluid.layers.concat(
            input=[fluid.layers.elementwise_sub(fluid.layers.ceil(norm), norm), norm], axis=1)
        metric['auc']['var'], metric['batch_auc']['var'], [metric['batch_stat_pos']['var'], \
T
tangwei 已提交
252 253
                                                           metric['batch_stat_neg']['var'], metric['stat_pos']['var'],
                                                           metric['stat_neg']['var']] = \
X
xiexionghang 已提交
254
            fluid.layers.auc(input=binary_predict, label=fluid.layers.cast(x=label_layer, dtype='int64'), \
T
tangwei 已提交
255
                             curve='ROC', num_thresholds=32)
X
xiexionghang 已提交
256 257 258 259 260 261

        metric['sqrerr']['var'], metric['abserr']['var'], metric['prob']['var'], metric['q']['var'], \
        metric['pos_ins_num']['var'], metric['total_ins_num']['var'] = \
            fluid.contrib.layers.ctr_metric_bundle(norm, fluid.layers.cast(x=label_layer, dtype='float32'))

        return norm, self._extend_output