test_sentiment.py 13.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
import unittest
16

17
import numpy as np
18
from test_lac import DynamicGRU
19

L
Leo Chen 已提交
20
import paddle
21
from paddle import fluid
22
from paddle.fluid.dygraph import to_variable
H
hjyp 已提交
23
from paddle.jit.api import to_static
24
from paddle.nn import Embedding, Linear
25 26 27 28 29 30 31 32 33 34

SEED = 2020

# Note: Set True to eliminate randomness.
#     1. For one operation, cuDNN has several algorithms,
#        some algorithm results are non-deterministic, like convolution algorithms.
if fluid.is_compiled_with_cuda():
    fluid.set_flags({'FLAGS_cudnn_deterministic': True})


35
class SimpleConvPool(paddle.nn.Layer):
36 37 38 39 40 41 42 43
    def __init__(
        self,
        num_channels,
        num_filters,
        filter_size,
        use_cudnn=True,
        batch_size=None,
    ):
44
        super().__init__()
45
        self.batch_size = batch_size
46 47 48 49
        self._conv2d = paddle.nn.Conv2D(
            in_channels=num_channels,
            out_channels=num_filters,
            kernel_size=filter_size,
50 51
            padding=[1, 1],
        )
52 53

    def forward(self, inputs):
54
        x = paddle.tanh(self._conv2d(inputs))
55
        x = paddle.max(x, axis=-1)
56
        x = paddle.reshape(x, shape=[self.batch_size, -1])
57 58 59
        return x


60
class CNN(paddle.nn.Layer):
61
    def __init__(self, dict_dim, batch_size, seq_len):
62
        super().__init__()
63 64 65 66 67 68 69 70 71
        self.dict_dim = dict_dim
        self.emb_dim = 128
        self.hid_dim = 128
        self.fc_hid_dim = 96
        self.class_dim = 2
        self.channels = 1
        self.win_size = [3, self.hid_dim]
        self.batch_size = batch_size
        self.seq_len = seq_len
72
        self.embedding = Embedding(
73 74 75
            self.dict_dim + 1,
            self.emb_dim,
            sparse=False,
76 77 78 79 80 81 82 83
        )
        self._simple_conv_pool_1 = SimpleConvPool(
            self.channels,
            self.hid_dim,
            self.win_size,
            batch_size=self.batch_size,
        )
        self._fc1 = Linear(
84 85
            self.hid_dim * self.seq_len,
            self.fc_hid_dim,
86
        )
87 88
        self._fc1_act = paddle.nn.Softmax()
        self._fc_prediction = Linear(self.fc_hid_dim, self.class_dim)
89

H
hjyp 已提交
90
    @to_static
91 92
    def forward(self, inputs, label=None):
        emb = self.embedding(inputs)
93 94 95
        o_np_mask = (paddle.reshape(inputs, [-1, 1]) != self.dict_dim).astype(
            dtype='float32'
        )
96
        mask_emb = paddle.expand(o_np_mask, [-1, self.hid_dim])
97
        emb = emb * mask_emb
98
        emb = paddle.reshape(
99 100
            emb, shape=[-1, self.channels, self.seq_len, self.hid_dim]
        )
101 102
        conv_3 = self._simple_conv_pool_1(emb)
        fc_1 = self._fc1(conv_3)
103
        fc_1 = self._fc1_act(fc_1)
104
        prediction = self._fc_prediction(fc_1)
105
        prediction = self._fc1_act(prediction)
106

107 108 109
        cost = paddle.nn.functional.cross_entropy(
            input=prediction, label=label, reduction='none', use_softmax=False
        )
110
        avg_cost = paddle.mean(x=cost)
111
        acc = paddle.static.accuracy(input=prediction, label=label)
112 113 114
        return avg_cost, prediction, acc


115
class BOW(paddle.nn.Layer):
116
    def __init__(self, dict_dim, batch_size, seq_len):
117
        super().__init__()
118 119 120 121 122 123 124
        self.dict_dim = dict_dim
        self.emb_dim = 128
        self.hid_dim = 128
        self.fc_hid_dim = 96
        self.class_dim = 2
        self.batch_size = batch_size
        self.seq_len = seq_len
125
        self.embedding = Embedding(
126 127 128
            self.dict_dim + 1,
            self.emb_dim,
            sparse=False,
129
        )
130 131 132
        self._fc1 = Linear(self.hid_dim, self.hid_dim)
        self._fc2 = Linear(self.hid_dim, self.fc_hid_dim)
        self._fc_prediction = Linear(self.fc_hid_dim, self.class_dim)
133

H
hjyp 已提交
134
    @to_static
135 136
    def forward(self, inputs, label=None):
        emb = self.embedding(inputs)
137 138 139
        o_np_mask = (paddle.reshape(inputs, [-1, 1]) != self.dict_dim).astype(
            dtype='float32'
        )
140
        mask_emb = paddle.expand(o_np_mask, [-1, self.hid_dim])
141
        emb = emb * mask_emb
142
        emb = paddle.reshape(emb, shape=[-1, self.seq_len, self.hid_dim])
143
        bow_1 = paddle.sum(emb, axis=1)
144
        bow_1 = paddle.tanh(bow_1)
145
        fc_1 = self._fc1(bow_1)
146
        fc_1 = paddle.tanh(fc_1)
147
        fc_2 = self._fc2(fc_1)
148
        fc_2 = paddle.tanh(fc_2)
149
        prediction = self._fc_prediction(fc_2)
150
        prediction = paddle.nn.functional.softmax(prediction)
151

152 153 154
        cost = paddle.nn.functional.cross_entropy(
            input=prediction, label=label, reduction='none', use_softmax=False
        )
155
        avg_cost = paddle.mean(x=cost)
156
        acc = paddle.static.accuracy(input=prediction, label=label)
157 158 159
        return avg_cost, prediction, acc


160
class GRU(paddle.nn.Layer):
161
    def __init__(self, dict_dim, batch_size, seq_len):
162
        super().__init__()
163 164 165 166 167 168 169
        self.dict_dim = dict_dim
        self.emb_dim = 128
        self.hid_dim = 128
        self.fc_hid_dim = 96
        self.class_dim = 2
        self.batch_size = batch_size
        self.seq_len = seq_len
170
        self.embedding = Embedding(
171 172 173 174
            self.dict_dim + 1,
            self.emb_dim,
            weight_attr=fluid.ParamAttr(learning_rate=30),
            sparse=False,
175
        )
176 177
        h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
        h_0 = to_variable(h_0)
178 179 180
        self._fc1 = Linear(self.hid_dim, self.hid_dim * 3)
        self._fc2 = Linear(self.hid_dim, self.fc_hid_dim)
        self._fc_prediction = Linear(self.fc_hid_dim, self.class_dim)
181 182
        self._gru = DynamicGRU(size=self.hid_dim, h_0=h_0)

H
hjyp 已提交
183
    @to_static
184 185
    def forward(self, inputs, label=None):
        emb = self.embedding(inputs)
186 187 188
        o_np_mask = (paddle.reshape(inputs, [-1, 1]) != self.dict_dim).astype(
            'float32'
        )
189
        mask_emb = paddle.expand(o_np_mask, [-1, self.hid_dim])
190
        emb = emb * mask_emb
191
        emb = paddle.reshape(emb, shape=[self.batch_size, -1, self.hid_dim])
192 193
        fc_1 = self._fc1(emb)
        gru_hidden = self._gru(fc_1)
194
        gru_hidden = paddle.max(gru_hidden, axis=1)
195
        tanh_1 = paddle.tanh(gru_hidden)
196
        fc_2 = self._fc2(tanh_1)
197
        fc_2 = paddle.tanh(fc_2)
198
        prediction = self._fc_prediction(fc_2)
199
        prediction = paddle.nn.functional.softmax(prediction)
200 201 202
        cost = paddle.nn.functional.cross_entropy(
            input=prediction, label=label, reduction='none', use_softmax=False
        )
203
        avg_cost = paddle.mean(x=cost)
204
        acc = paddle.static.accuracy(input=prediction, label=label)
205 206 207
        return avg_cost, prediction, acc


208
class BiGRU(paddle.nn.Layer):
209
    def __init__(self, dict_dim, batch_size, seq_len):
210
        super().__init__()
211 212 213 214 215 216 217
        self.dict_dim = dict_dim
        self.emb_dim = 128
        self.hid_dim = 128
        self.fc_hid_dim = 96
        self.class_dim = 2
        self.batch_size = batch_size
        self.seq_len = seq_len
218
        self.embedding = Embedding(
219 220 221 222
            self.dict_dim + 1,
            self.emb_dim,
            weight_attr=fluid.ParamAttr(learning_rate=30),
            sparse=False,
223
        )
224 225
        h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
        h_0 = to_variable(h_0)
226 227 228
        self._fc1 = Linear(self.hid_dim, self.hid_dim * 3)
        self._fc2 = Linear(self.hid_dim * 2, self.fc_hid_dim)
        self._fc_prediction = Linear(self.fc_hid_dim, self.class_dim)
229 230 231 232 233 234
        self._gru_forward = DynamicGRU(
            size=self.hid_dim, h_0=h_0, is_reverse=False
        )
        self._gru_backward = DynamicGRU(
            size=self.hid_dim, h_0=h_0, is_reverse=True
        )
235

H
hjyp 已提交
236
    @to_static
237 238
    def forward(self, inputs, label=None):
        emb = self.embedding(inputs)
239 240 241
        o_np_mask = (paddle.reshape(inputs, [-1, 1]) != self.dict_dim).astype(
            'float32'
        )
242 243
        mask_emb = paddle.expand(o_np_mask, [-1, self.hid_dim])

244
        emb = emb * mask_emb
245
        emb = paddle.reshape(emb, shape=[self.batch_size, -1, self.hid_dim])
246 247 248
        fc_1 = self._fc1(emb)
        gru_forward = self._gru_forward(fc_1)
        gru_backward = self._gru_backward(fc_1)
249 250
        gru_forward_tanh = paddle.tanh(gru_forward)
        gru_backward_tanh = paddle.tanh(gru_backward)
251 252
        encoded_vector = paddle.concat(
            [gru_forward_tanh, gru_backward_tanh], axis=2
253
        )
254
        encoded_vector = paddle.max(encoded_vector, axis=1)
255
        fc_2 = self._fc2(encoded_vector)
256
        fc_2 = paddle.tanh(fc_2)
257
        prediction = self._fc_prediction(fc_2)
258
        prediction = paddle.nn.functional.softmax(prediction)
259 260
        # TODO(Aurelius84): Uncomment the following codes when we support return variable-length vars.
        # if label is not None:
261 262 263
        cost = paddle.nn.functional.cross_entropy(
            input=prediction, label=label, reduction='none', use_softmax=False
        )
264
        avg_cost = paddle.mean(x=cost)
265
        acc = paddle.static.accuracy(input=prediction, label=label)
266 267 268 269 270 271
        return avg_cost, prediction, acc
        # else:
        #     return prediction


def fake_data_reader(class_num, vocab_size, batch_size, padding_size):
272 273
    local_random = np.random.RandomState(SEED)

274 275 276
    def reader():
        batch_data = []
        while True:
277
            label = local_random.randint(0, class_num)
278 279 280
            seq_len = local_random.randint(
                padding_size // 2, int(padding_size * 1.2)
            )
281
            word_ids = local_random.randint(0, vocab_size, [seq_len]).tolist()
282 283 284
            word_ids = word_ids[:padding_size] + [vocab_size] * (
                padding_size - seq_len
            )
285 286 287 288 289 290 291 292
            batch_data.append((word_ids, [label], seq_len))
            if len(batch_data) == batch_size:
                yield batch_data
                batch_data = []

    return reader


293
class Args:
294 295 296 297 298 299
    epoch = 1
    batch_size = 4
    class_num = 2
    lr = 0.01
    vocab_size = 1000
    padding_size = 50
300
    log_step = 5
301 302 303 304
    train_step = 10


def train(args, to_static):
R
Ryan 已提交
305
    paddle.jit.enable_to_static(to_static)
306 307 308
    place = (
        fluid.CUDAPlace(0)
        if fluid.is_compiled_with_cuda()
309
        else fluid.CPUPlace()
310
    )
311 312 313

    with fluid.dygraph.guard(place):
        np.random.seed(SEED)
C
cnn 已提交
314
        paddle.seed(SEED)
L
Leo Chen 已提交
315
        paddle.framework.random._manual_program_seed(SEED)
316

317 318 319
        train_reader = fake_data_reader(
            args.class_num, args.vocab_size, args.batch_size, args.padding_size
        )
320 321 322 323 324 325 326 327 328 329 330
        train_loader = fluid.io.DataLoader.from_generator(capacity=24)
        train_loader.set_sample_list_generator(train_reader)

        if args.model_type == 'cnn_net':
            model = CNN(args.vocab_size, args.batch_size, args.padding_size)
        elif args.model_type == 'bow_net':
            model = BOW(args.vocab_size, args.batch_size, args.padding_size)
        elif args.model_type == 'gru_net':
            model = GRU(args.vocab_size, args.batch_size, args.padding_size)
        elif args.model_type == 'bigru_net':
            model = BiGRU(args.vocab_size, args.batch_size, args.padding_size)
L
LoneRanger 已提交
331 332
        sgd_optimizer = paddle.optimizer.Adagrad(
            learning_rate=args.lr, parameters=model.parameters()
333
        )
334 335 336 337 338 339 340 341 342 343 344

        loss_data = []
        for eop in range(args.epoch):
            time_begin = time.time()
            for batch_id, data in enumerate(train_loader()):
                word_ids, labels, seq_lens = data
                doc = to_variable(word_ids.numpy().reshape(-1)).astype('int64')
                label = labels.astype('int64')

                model.train()
                avg_cost, prediction, acc = model(doc, label)
345
                loss_data.append(float(avg_cost))
346 347 348 349 350 351 352 353

                avg_cost.backward()
                sgd_optimizer.minimize(avg_cost)
                model.clear_gradients()

                if batch_id % args.log_step == 0:
                    time_end = time.time()
                    used_time = time_end - time_begin
S
Sing_chan 已提交
354 355 356
                    # used_time may be 0.0, cause zero division error
                    if used_time < 1e-5:
                        used_time = 1e-5
357 358 359 360
                    print(
                        "step: %d, ave loss: %f, speed: %f steps/s"
                        % (
                            batch_id,
361
                            float(avg_cost),
362 363 364
                            args.log_step / used_time,
                        )
                    )
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
                    time_begin = time.time()

                if batch_id == args.train_step:
                    break
                batch_id += 1
    return loss_data


class TestSentiment(unittest.TestCase):
    def setUp(self):
        self.args = Args()

    def train_model(self, model_type='cnn_net'):
        self.args.model_type = model_type
        st_out = train(self.args, True)
        dy_out = train(self.args, False)
381 382 383 384
        np.testing.assert_allclose(
            dy_out,
            st_out,
            rtol=1e-05,
385
            err_msg=f'dy_out:\n {dy_out}\n st_out:\n {st_out}',
386
        )
387 388 389 390 391 392 393 394 395

    def test_train(self):
        model_types = ['cnn_net', 'bow_net', 'gru_net', 'bigru_net']
        for model_type in model_types:
            print('training %s ....' % model_type)
            self.train_model(model_type)


if __name__ == '__main__':
396
    unittest.main()