test_nce.py 12.6 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

W
wanghaoshuang 已提交
15
import numpy as np
16
import unittest
17
import paddle
18 19
import paddle.fluid as fluid
import paddle.fluid.initializer as initializer
20
from paddle.fluid import Program, program_guard
21

22
from op_test import OpTest
W
wanghaoshuang 已提交
23 24


25 26 27
def nce(
    input, weight, bias, sample_weight, labels, num_classes, num_sample_class
):
W
wanghaoshuang 已提交
28 29 30 31 32 33 34 35 36 37 38 39 40
    samples = []
    sample_labels = []
    batch_size = input.shape[0]
    num_true_class = labels.shape[1]
    for i in range(batch_size):
        w = 1 if sample_weight is None else sample_weight[i]
        for label in labels[i]:
            samples.append((i, label, True, w))
            sample_labels.append(label)
        for num in range(num_sample_class):
            samples.append((i, num, False, w))
            sample_labels.append(num)
    # forward bias
W
wanghaoshuang 已提交
41
    sample_out = np.zeros(len(samples)).astype(np.float32)
W
wanghaoshuang 已提交
42 43
    if bias is not None:
        for i in range(len(samples)):
W
wanghaoshuang 已提交
44
            sample_out[i] = bias[samples[i][1]]
W
wanghaoshuang 已提交
45 46
    # forward weight
    for i in range(len(samples)):
W
wanghaoshuang 已提交
47
        sample_out[i] += np.dot(input[samples[i][0]], weight[samples[i][1]])
W
wanghaoshuang 已提交
48 49

    # forward activation
W
wanghaoshuang 已提交
50
    sample_out = 1.0 / (1.0 + np.exp(-sample_out))
W
wanghaoshuang 已提交
51 52 53 54
    # forward cost
    out = np.zeros(batch_size).astype(np.float32)
    b = 1.0 / num_classes * num_sample_class
    for i in range(len(samples)):
W
wanghaoshuang 已提交
55
        o = sample_out[i]
W
wanghaoshuang 已提交
56 57
        cost = -np.log(o / (o + b)) if samples[i][2] else -np.log(b / (o + b))
        out[samples[i][0]] += cost * samples[i][3]
58 59 60 61 62 63 64 65 66
    return (
        out[:, np.newaxis],
        np.array(sample_out).reshape(
            batch_size, num_sample_class + num_true_class
        ),
        np.array(sample_labels).reshape(
            batch_size, num_sample_class + num_true_class
        ),
    )
W
wanghaoshuang 已提交
67 68 69


class TestNCE(OpTest):
70 71 72 73 74 75 76 77 78
    def generate_data(
        self,
        dim,
        batch_size,
        num_classes,
        num_true_class,
        num_neg_samples,
        is_sparse,
    ):
W
wanghaoshuang 已提交
79 80 81 82
        input = np.random.randn(batch_size, dim).astype(np.float32)
        weight = np.random.randn(num_classes, dim).astype(np.float32)
        bias = np.random.randn(num_classes).astype(np.float32)
        sample_weight = np.random.randn(batch_size).astype(np.float32)
83 84 85
        labels = np.random.randint(
            0, num_classes, (batch_size, num_true_class)
        ).astype("int64")
W
wanghaoshuang 已提交
86
        self.attrs = {
W
wanghaoshuang 已提交
87 88
            'num_total_classes': num_classes,
            'num_neg_samples': num_neg_samples,
89 90
            'custom_neg_classes': list(range(num_neg_samples)),
            'seed': 0,
91
            'sampler': 0,
P
pangyoki 已提交
92
            'is_sparse': is_sparse,
93
            'is_test': self.is_test,
W
wanghaoshuang 已提交
94 95
        }
        self.inputs = {
W
wanghaoshuang 已提交
96
            'Input': input,
W
wanghaoshuang 已提交
97
            'Label': labels,
W
wanghaoshuang 已提交
98 99
            'Weight': weight,
            'Bias': bias,
100
            'SampleWeight': sample_weight,
W
wanghaoshuang 已提交
101 102
        }

P
pangyoki 已提交
103 104 105
    def set_is_test(self):
        self.is_test = False

W
wanghaoshuang 已提交
106
    def set_data(self):
Z
zhupengyang 已提交
107
        self.generate_data(5, 25, 100, 1, 2, False)
W
wanghaoshuang 已提交
108 109

    def compute(self):
110 111 112 113 114 115 116 117 118
        out = nce(
            self.inputs['Input'],
            self.inputs['Weight'],
            self.inputs['Bias'],
            self.inputs['SampleWeight'],
            self.inputs['Label'],
            self.attrs['num_total_classes'],
            self.attrs['num_neg_samples'],
        )
P
pangyoki 已提交
119 120 121 122 123 124
        if self.is_test:
            self.outputs = {'Cost': out[0]}
        else:
            self.outputs = {
                'Cost': out[0],
                'SampleLogits': out[1],
125
                'SampleLabels': out[2],
P
pangyoki 已提交
126
            }
W
wanghaoshuang 已提交
127 128 129

    def setUp(self):
        self.op_type = 'nce'
P
pangyoki 已提交
130
        self.set_is_test()
W
wanghaoshuang 已提交
131 132 133 134 135 136 137
        self.set_data()
        self.compute()

    def test_check_output(self):
        self.check_output()

    def test_check_grad(self):
138 139 140
        self.check_grad(
            ["Input", "Weight", "Bias"], "Cost", max_relative_error=0.02
        )
W
wanghaoshuang 已提交
141 142


143
class TestNCECase1Tensor(TestNCE):
W
wanghaoshuang 已提交
144
    def set_data(self):
Z
zhupengyang 已提交
145
        self.generate_data(10, 20, 100, 2, 5, False)
146 147


P
pangyoki 已提交
148 149 150 151 152 153 154 155 156
class TestNCETensorIsTest(TestNCE):
    # if is_test = True, there's no need to calculate grad
    def set_is_test(self):
        self.is_test = True

    def test_check_grad(self):
        pass


157 158 159 160 161 162 163 164 165 166 167 168
class TestNCECase1SelectedRows(unittest.TestCase):
    def setUp(self):
        self.base_lr = 0.0001
        self.batch_size = 8

    @staticmethod
    def get_place():
        place = fluid.core.CPUPlace()
        return place

    @staticmethod
    def get_train_data(batch_size):
T
tianshuo78520a 已提交
169
        batches = []
170 171 172
        for i in range(batch_size):
            input = np.random.randn(batch_size, 10).astype(np.float32)
            labels = np.random.randint(0, 20, (batch_size, 1))
T
tianshuo78520a 已提交
173 174
            batches.append([input, labels])
        return batches
175 176 177 178 179 180

    def get_optimizer(self):
        # SGD optimizer
        optimizer = fluid.optimizer.SGD(learning_rate=self.base_lr)
        return optimizer

181 182 183 184 185 186 187 188
    def train_network(
        self,
        num_total_classes,
        num_neg_samples,
        sampler,
        custom_dist,
        is_sparse,
    ):
189 190 191
        input = fluid.layers.data(name="input", shape=[10], dtype="float32")
        label = fluid.layers.data(name="label", shape=[1], dtype="int64")

192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
        w_param = (
            fluid.default_main_program()
            .global_block()
            .create_parameter(
                shape=[num_total_classes, 10],
                dtype='float32',
                name='nce_w',
                initializer=initializer.ConstantInitializer(),
            )
        )
        b_param = (
            fluid.default_main_program()
            .global_block()
            .create_parameter(
                shape=[num_total_classes, 1],
                dtype='float32',
                name='nce_b',
                initializer=initializer.ConstantInitializer(),
            )
        )

213
        cost = paddle.static.nn.nce(
214 215 216 217 218 219 220 221 222 223 224 225
            input=input,
            label=label,
            num_total_classes=num_total_classes,
            sampler=sampler,
            custom_dist=custom_dist,
            sample_weight=None,
            param_attr='nce_w',
            bias_attr='nce_b',
            seed=1,
            num_neg_samples=num_neg_samples,
            is_sparse=is_sparse,
        )
226
        avg_cost = paddle.mean(cost)
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
        # optimizer
        optimizer = self.get_optimizer()
        optimizer.minimize(avg_cost)

        return [avg_cost, [input, label]]

    def test_input_is_selected_rows(self):
        place = self.get_place()
        exe = fluid.Executor(place)

        data = self.get_train_data(self.batch_size)
        nid_freq_arr = np.random.dirichlet(np.ones(20) * 1000).astype('float32')

        rets = []
        # for dense
        dense_scope = fluid.core.Scope()
        dense_startup_program = fluid.framework.Program()
        dense_train_program = fluid.framework.Program()
        with fluid.scope_guard(dense_scope):
246 247 248 249 250 251
            with fluid.program_guard(
                dense_train_program, dense_startup_program
            ):
                cost, feeds = self.train_network(
                    20, 5, "custom_dist", nid_freq_arr.tolist(), False
                )
252 253
                feeder = fluid.DataFeeder(feed_list=feeds, place=place)
                exe.run(dense_startup_program)
254 255 256 257 258
                loss_val = exe.run(
                    dense_train_program,
                    feed=feeder.feed(data),
                    fetch_list=[cost.name],
                )
259 260 261 262 263 264 265
                rets.append(np.mean(loss_val))

        # for sparse
        sparse_scope = fluid.core.Scope()
        sparse_startup_program = fluid.framework.Program()
        sparse_train_program = fluid.framework.Program()
        with fluid.scope_guard(sparse_scope):
266 267 268 269 270 271
            with fluid.program_guard(
                sparse_train_program, sparse_startup_program
            ):
                cost, feeds = self.train_network(
                    20, 5, "custom_dist", nid_freq_arr.tolist(), True
                )
272 273
                feeder = fluid.DataFeeder(feed_list=feeds, place=place)
                exe.run(sparse_startup_program)
274 275 276 277 278
                loss_val = exe.run(
                    sparse_train_program,
                    feed=feeder.feed(data),
                    fetch_list=[cost.name],
                )
279 280 281
                rets.append(np.mean(loss_val))

        self.assertEqual(rets[0], rets[1])
W
wanghaoshuang 已提交
282 283


284
class TestNCE_OpError(unittest.TestCase):
285 286
    def test_errors(self):
        with program_guard(Program(), Program()):
287 288 289 290 291 292
            input1 = fluid.create_lod_tensor(
                np.array([0.0, 3.0, 2.0, 4.0]), [[1, 1, 2]], fluid.CPUPlace()
            )
            label1 = fluid.layers.data(
                name='label1', shape=[-1, 4], dtype="int64"
            )
293
            # the input(input) of nce layer must be Variable.
294 295 296
            self.assertRaises(
                TypeError, paddle.static.nn.nce, input1, label1, 5
            )
297

298 299 300 301 302 303
            input2 = fluid.layers.data(
                name='input2', shape=[-1, 4], dtype="float32"
            )
            label2 = fluid.create_lod_tensor(
                np.array([0.0, 3.0, 2.0, 4.0]), [[1, 1, 2]], fluid.CPUPlace()
            )
304
            # the input(label) of nce layer must be Variable.
305 306 307
            self.assertRaises(
                TypeError, paddle.static.nn.nce, input2, label2, 5
            )
308

309 310 311 312 313 314
            input3 = fluid.layers.data(
                name='input3', shape=[-1, 4], dtype="float16"
            )
            label3 = fluid.layers.data(
                name='label3', shape=[-1, 1], dtype="int64"
            )
315
            # the data type of input(input) must be float32 or float64.
316 317 318
            self.assertRaises(
                TypeError, paddle.static.nn.nce, input3, label3, 5
            )
319

320 321 322 323 324 325
            input4 = fluid.layers.data(
                name='input4', shape=[-1, 4], dtype="float32"
            )
            label4 = fluid.layers.data(
                name='label4', shape=[-1, 1], dtype="int32"
            )
326
            # the data type of input(label) must be int64.
327 328 329
            self.assertRaises(
                TypeError, paddle.static.nn.nce, input4, label4, 5
            )
330 331


332 333 334 335
class TestDygraphNCE_OpError(unittest.TestCase):
    def test_NCE_errors(self):
        with program_guard(Program(), Program()):
            nce = fluid.NCE(20, 5)
336 337 338 339 340 341
            input1 = fluid.create_lod_tensor(
                np.array([0.0, 3.0, 2.0, 4.0]), [[1, 1, 2]], fluid.CPUPlace()
            )
            label1 = fluid.layers.data(
                name='label1', shape=[-1, 4], dtype="int64"
            )
342 343 344
            # the input(input) of NCE layer must be Variable.
            self.assertRaises(TypeError, nce, input1, label1)

345 346 347 348 349 350
            input2 = fluid.layers.data(
                name='input2', shape=[-1, 4], dtype="float32"
            )
            label2 = fluid.create_lod_tensor(
                np.array([0.0, 3.0, 2.0, 4.0]), [[1, 1, 2]], fluid.CPUPlace()
            )
351 352 353
            # the input(label) of NCE layer must be Variable.
            self.assertRaises(TypeError, nce, input2, label2)

354 355 356 357 358 359
            input3 = fluid.layers.data(
                name='input3', shape=[-1, 4], dtype="float16"
            )
            label3 = fluid.layers.data(
                name='label3', shape=[-1, 1], dtype="int64"
            )
360 361 362
            # the data type of input(input) must be float32 or float64.
            self.assertRaises(TypeError, nce, input3, label3)

363 364 365 366 367 368
            input4 = fluid.layers.data(
                name='input4', shape=[-1, 4], dtype="float32"
            )
            label4 = fluid.layers.data(
                name='label4', shape=[-1, 1], dtype="int32"
            )
369 370 371
            # the data type of input(label) must be int64.
            self.assertRaises(TypeError, nce, input4, label4)

372 373 374 375 376 377
            input5 = fluid.layers.data(
                name='input5', shape=[-1, 4], dtype="float32"
            )
            label5 = fluid.layers.data(
                name='label5', shape=[-1, 1], dtype="int64"
            )
378
            sample_weight = fluid.create_lod_tensor(
379 380
                np.array([0.0, 3.0, 2.0, 4.0]), [[1, 1, 2]], fluid.CPUPlace()
            )
381 382 383 384
            # the sample_weight of nce must be Variable or None.
            self.assertRaises(TypeError, nce, input5, label5, sample_weight)


W
wanghaoshuang 已提交
385 386
if __name__ == '__main__':
    unittest.main()