test_lac.py 23.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math
16 17
import os
import tempfile
18 19 20
import time
import unittest

21
import numpy as np
22

23 24
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

L
Leo Chen 已提交
25
import paddle
26
import paddle.fluid as fluid
27
from paddle import _legacy_C_ops
28
from paddle.fluid.dygraph import Embedding, GRUUnit, to_variable
29
from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
30
from paddle.fluid.framework import _non_static_mode
31 32
from paddle.jit import ProgramTranslator
from paddle.jit.api import declarative
33 34 35 36

SEED = 2020

program_translator = ProgramTranslator()
37 38 39 40
# Add InputSpec to make unittest run faster.
input_specs = [
    paddle.static.InputSpec([None, None], 'int64'),
    paddle.static.InputSpec([None, None], 'int64'),
41
    paddle.static.InputSpec([None], 'int64'),
42
]
43 44 45


class DynamicGRU(fluid.dygraph.Layer):
46 47 48 49 50 51 52 53 54 55 56 57
    def __init__(
        self,
        size,
        h_0=None,
        param_attr=None,
        bias_attr=None,
        is_reverse=False,
        gate_activation='sigmoid',
        candidate_activation='tanh',
        origin_mode=False,
        init_size=None,
    ):
58
        super().__init__()
59

60 61 62 63 64 65 66 67
        self.gru_unit = GRUUnit(
            size * 3,
            param_attr=param_attr,
            bias_attr=bias_attr,
            activation=candidate_activation,
            gate_activation=gate_activation,
            origin_mode=origin_mode,
        )
68 69 70 71 72 73 74 75 76 77 78 79 80 81

        self.size = size
        self.h_0 = h_0
        self.is_reverse = is_reverse

    def forward(self, inputs):
        # Use `to_variable` to create a copy of global h_0 created not in `DynamicGRU`,
        # to avoid modify it because `h_0` is both used in other `DynamicGRU`.
        hidden = to_variable(self.h_0)
        hidden.stop_gradient = True

        res = []
        for i in range(inputs.shape[1]):
            if self.is_reverse:
2
201716010711 已提交
82
                j = paddle.shape(inputs)[1] - 1 - i
83
            else:
84 85 86
                j = i

            # input_ = inputs[:, j:j+1, :]  # original code
2
201716010711 已提交
87
            input_ = paddle.slice(inputs, axes=[1], starts=[j], ends=[j + 1])
88
            input_ = paddle.reshape(input_, [-1, input_.shape[2]])
89
            hidden, reset, gate = self.gru_unit(input_, hidden)
90
            hidden_ = paddle.reshape(hidden, [-1, 1, hidden.shape[1]])
91 92 93 94 95 96 97 98 99 100
            res.append(hidden_)

        if self.is_reverse:
            res = res[::-1]
        res = fluid.layers.concat(res, axis=1)
        return res


class BiGRU(fluid.dygraph.Layer):
    def __init__(self, input_dim, grnn_hidden_dim, init_bound, h_0=None):
101
        super().__init__()
102

103 104 105 106
        self.pre_gru = paddle.nn.Linear(
            in_features=input_dim,
            out_features=grnn_hidden_dim * 3,
            weight_attr=fluid.ParamAttr(
107 108 109
                initializer=fluid.initializer.Uniform(
                    low=-init_bound, high=init_bound
                ),
110
                regularizer=fluid.regularizer.L2DecayRegularizer(
111 112 113 114
                    regularization_coeff=1e-4
                ),
            ),
        )
115 116 117 118 119

        self.gru = DynamicGRU(
            size=grnn_hidden_dim,
            h_0=h_0,
            param_attr=fluid.ParamAttr(
120 121 122
                initializer=fluid.initializer.Uniform(
                    low=-init_bound, high=init_bound
                ),
123
                regularizer=fluid.regularizer.L2DecayRegularizer(
124 125 126 127
                    regularization_coeff=1e-4
                ),
            ),
        )
128

129 130 131 132
        self.pre_gru_r = paddle.nn.Linear(
            in_features=input_dim,
            out_features=grnn_hidden_dim * 3,
            weight_attr=fluid.ParamAttr(
133 134 135
                initializer=fluid.initializer.Uniform(
                    low=-init_bound, high=init_bound
                ),
136
                regularizer=fluid.regularizer.L2DecayRegularizer(
137 138 139 140
                    regularization_coeff=1e-4
                ),
            ),
        )
141 142 143 144 145 146

        self.gru_r = DynamicGRU(
            size=grnn_hidden_dim,
            is_reverse=True,
            h_0=h_0,
            param_attr=fluid.ParamAttr(
147 148 149
                initializer=fluid.initializer.Uniform(
                    low=-init_bound, high=init_bound
                ),
150
                regularizer=fluid.regularizer.L2DecayRegularizer(
151 152 153 154
                    regularization_coeff=1e-4
                ),
            ),
        )
155 156 157 158 159 160 161 162 163 164 165 166 167 168

    def forward(self, input_feature):
        res_pre_gru = self.pre_gru(input_feature)
        res_gru = self.gru(res_pre_gru)

        res_pre_gru_r = self.pre_gru_r(input_feature)
        res_gru_r = self.gru_r(res_pre_gru_r)

        bi_merge = fluid.layers.concat(input=[res_gru, res_gru_r], axis=-1)
        return bi_merge


class LinearChainCRF(fluid.dygraph.Layer):
    def __init__(self, param_attr, size=None, is_test=False, dtype='float32'):
169
        super().__init__()
170 171 172 173 174 175 176 177

        self._param_attr = param_attr
        self._dtype = dtype
        self._size = size
        self._is_test = is_test
        self._transition = self.create_parameter(
            attr=self._param_attr,
            shape=[self._size + 2, self._size],
178 179
            dtype=self._dtype,
        )
180 181 182 183 184 185 186 187 188 189

    @property
    def weight(self):
        return self._transition

    @weight.setter
    def weight(self, value):
        self._transition = value

    def forward(self, input, label, length=None):
190
        if _non_static_mode():
191
            _, _, _, log_likelihood = _legacy_C_ops.linear_chain_crf(
192 193
                input, self._transition, label, length, "is_test", self._is_test
            )
194
            return log_likelihood
195 196

        alpha = self._helper.create_variable_for_type_inference(
197 198
            dtype=self._dtype
        )
199
        emission_exps = self._helper.create_variable_for_type_inference(
200 201
            dtype=self._dtype
        )
202
        transition_exps = self._helper.create_variable_for_type_inference(
203 204
            dtype=self._dtype
        )
205
        log_likelihood = self._helper.create_variable_for_type_inference(
206 207
            dtype=self._dtype
        )
208 209 210
        this_inputs = {
            "Emission": [input],
            "Transition": self._transition,
211
            "Label": [label],
212 213 214
        }
        if length is not None:
            this_inputs['Length'] = [length]
215 216 217 218 219 220 221 222 223 224 225 226 227
        self._helper.append_op(
            type='linear_chain_crf',
            inputs=this_inputs,
            outputs={
                "Alpha": [alpha],
                "EmissionExps": [emission_exps],
                "TransitionExps": transition_exps,
                "LogLikelihood": log_likelihood,
            },
            attrs={
                "is_test": self._is_test,
            },
        )
228 229 230 231 232
        return log_likelihood


class CRFDecoding(fluid.dygraph.Layer):
    def __init__(self, param_attr, size=None, is_test=False, dtype='float32'):
233
        super().__init__()
234 235 236 237 238 239 240 241

        self._dtype = dtype
        self._size = size
        self._is_test = is_test
        self._param_attr = param_attr
        self._transition = self.create_parameter(
            attr=self._param_attr,
            shape=[self._size + 2, self._size],
242 243
            dtype=self._dtype,
        )
244 245 246 247 248 249 250 251 252 253

    @property
    def weight(self):
        return self._transition

    @weight.setter
    def weight(self, value):
        self._transition = value

    def forward(self, input, label=None, length=None):
254
        if _non_static_mode():
255 256 257
            return _legacy_C_ops.crf_decoding(
                input, self._transition, label, length, "is_test", self._is_test
            )
258 259

        viterbi_path = self._helper.create_variable_for_type_inference(
260 261
            dtype=self._dtype
        )
262 263 264
        this_inputs = {
            "Emission": [input],
            "Transition": self._transition,
265
            "Label": label,
266 267 268
        }
        if length is not None:
            this_inputs['Length'] = [length]
269 270 271 272 273 274 275 276
        self._helper.append_op(
            type='crf_decoding',
            inputs=this_inputs,
            outputs={"ViterbiPath": [viterbi_path]},
            attrs={
                "is_test": self._is_test,
            },
        )
277 278 279 280
        return viterbi_path


class ChunkEval(fluid.dygraph.Layer):
281 282 283
    def __init__(
        self, num_chunk_types, chunk_scheme, excluded_chunk_types=None
    ):
284
        super().__init__()
285 286 287 288 289
        self.num_chunk_types = num_chunk_types
        self.chunk_scheme = chunk_scheme
        self.excluded_chunk_types = excluded_chunk_types

    def forward(self, input, label, seq_length=None):
290
        if _non_static_mode():
291 292 293 294 295 296 297 298 299 300 301
            return _legacy_C_ops.chunk_eval(
                input,
                label,
                seq_length,
                "num_chunk_types",
                self.num_chunk_types,
                "chunk_scheme",
                self.chunk_scheme,
                "excluded_chunk_types",
                self.excluded_chunk_types or [],
            )
302 303

        precision = self._helper.create_variable_for_type_inference(
304 305
            dtype="float32"
        )
306
        recall = self._helper.create_variable_for_type_inference(
307 308
            dtype="float32"
        )
309
        f1_score = self._helper.create_variable_for_type_inference(
310 311
            dtype="float32"
        )
312
        num_infer_chunks = self._helper.create_variable_for_type_inference(
313 314
            dtype="int64"
        )
315
        num_label_chunks = self._helper.create_variable_for_type_inference(
316 317
            dtype="int64"
        )
318
        num_correct_chunks = self._helper.create_variable_for_type_inference(
319 320
            dtype="int64"
        )
321 322 323 324 325

        this_input = {"Inference": [input], "Label": [label]}
        if seq_length is not None:
            this_input["SeqLength"] = [seq_length]

326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
        self._helper.append_op(
            type='chunk_eval',
            inputs=this_input,
            outputs={
                "Precision": [precision],
                "Recall": [recall],
                "F1-Score": [f1_score],
                "NumInferChunks": [num_infer_chunks],
                "NumLabelChunks": [num_label_chunks],
                "NumCorrectChunks": [num_correct_chunks],
            },
            attrs={
                "num_chunk_types": self.num_chunk_types,
                "chunk_scheme": self.chunk_scheme,
                "excluded_chunk_types": self.excluded_chunk_types or [],
            },
        )
        return (
            precision,
            recall,
            f1_score,
            num_infer_chunks,
            num_label_chunks,
            num_correct_chunks,
        )
351 352 353 354


class LexNet(fluid.dygraph.Layer):
    def __init__(self, args, length=None):
355
        super().__init__()
356 357 358 359 360 361 362 363 364 365 366 367 368
        """
        define the lexical analysis network structure
        word: stores the input of the model
        for_infer: a boolean value, indicating if the model to be created is for training or predicting.

        return:
            for infer: return the prediction
            otherwise: return the prediction
        """
        self.word_emb_dim = args.word_emb_dim
        self.vocab_size = args.vocab_size
        self.num_labels = args.num_labels
        self.grnn_hidden_dim = args.grnn_hidden_dim
369 370 371 372 373 374
        self.emb_lr = (
            args.emb_learning_rate if 'emb_learning_rate' in dir(args) else 1.0
        )
        self.crf_lr = (
            args.emb_learning_rate if 'crf_learning_rate' in dir(args) else 1.0
        )
375 376 377 378 379 380
        self.bigru_num = args.bigru_num
        self.init_bound = 0.1

        self.word_embedding = Embedding(
            size=[self.vocab_size, self.word_emb_dim],
            dtype='float32',
381 382 383 384 385 386 387 388
            param_attr=fluid.ParamAttr(
                learning_rate=self.emb_lr,
                name="word_emb",
                initializer=fluid.initializer.Uniform(
                    low=-self.init_bound, high=self.init_bound
                ),
            ),
        )
389 390 391 392 393 394 395 396 397 398

        h_0 = np.zeros((args.batch_size, self.grnn_hidden_dim), dtype="float32")
        h_0 = to_variable(h_0)

        self.bigru_units = []
        for i in range(self.bigru_num):
            if i == 0:
                self.bigru_units.append(
                    self.add_sublayer(
                        "bigru_units%d" % i,
399 400 401 402 403 404 405 406
                        BiGRU(
                            self.grnn_hidden_dim,
                            self.grnn_hidden_dim,
                            self.init_bound,
                            h_0=h_0,
                        ),
                    )
                )
407 408 409 410
            else:
                self.bigru_units.append(
                    self.add_sublayer(
                        "bigru_units%d" % i,
411 412 413 414 415 416 417 418 419
                        BiGRU(
                            self.grnn_hidden_dim * 2,
                            self.grnn_hidden_dim,
                            self.init_bound,
                            h_0=h_0,
                        ),
                    )
                )

420 421 422 423
        self.fc = paddle.nn.Linear(
            in_features=self.grnn_hidden_dim * 2,
            out_features=self.num_labels,
            weight_attr=fluid.ParamAttr(
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443
                initializer=fluid.initializer.Uniform(
                    low=-self.init_bound, high=self.init_bound
                ),
                regularizer=fluid.regularizer.L2DecayRegularizer(
                    regularization_coeff=1e-4
                ),
            ),
        )

        self.linear_chain_crf = LinearChainCRF(
            param_attr=fluid.ParamAttr(
                name='linear_chain_crfw', learning_rate=self.crf_lr
            ),
            size=self.num_labels,
        )

        self.crf_decoding = CRFDecoding(
            param_attr=fluid.ParamAttr(name='crfw', learning_rate=self.crf_lr),
            size=self.num_labels,
        )
444 445 446
        # share weight
        self.crf_decoding.weight = self.linear_chain_crf.weight

447
    @declarative(input_spec=input_specs)
448 449 450 451 452 453 454 455 456 457 458 459 460
    def forward(self, word, target, length=None):
        """
        Configure the network
        """
        word_embed = self.word_embedding(word)
        input_feature = word_embed

        for i in range(self.bigru_num):
            bigru_output = self.bigru_units[i](input_feature)
            input_feature = bigru_output

        emission = self.fc(bigru_output)

461 462 463
        crf_cost = self.linear_chain_crf(
            input=emission, label=target, length=length
        )
464
        avg_cost = paddle.mean(x=crf_cost)
465 466 467 468
        crf_decode = self.crf_decoding(input=emission, length=length)
        return avg_cost, crf_decode


469
class Args:
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489
    epoch = 1
    batch_size = 4
    vocab_size = 100
    num_labels = 10
    word_emb_dim = 128
    grnn_hidden_dim = 128
    base_learning_rate = 0.01
    bigru_num = 2
    print_steps = 1


def get_random_input_data(batch_size, vocab_size, num_labels, max_seq_len=64):
    local_random = np.random.RandomState(SEED)
    padding_id = np.int64(0)
    iter_num = 5

    def __reader__():
        batch, init_lens = [], []
        for i in range(iter_num * batch_size):
            cur_len = local_random.randint(3, max_seq_len)
490 491 492 493 494 495 496 497 498 499
            word_ids = (
                local_random.randint(0, vocab_size, [cur_len])
                .astype('int64')
                .tolist()
            )
            label_ids = (
                local_random.randint(0, num_labels, [cur_len])
                .astype('int64')
                .tolist()
            )
500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523
            batch.append((word_ids, label_ids))
            init_lens.append(cur_len)
            if len(batch) == batch_size:
                batch_max_len = min(max(init_lens), max_seq_len)
                new_batch = []
                for words_len, (word_ids, label_ids) in zip(init_lens, batch):
                    word_ids = word_ids[0:batch_max_len]
                    words_len = np.int64(len(word_ids))
                    word_ids += [
                        padding_id for _ in range(batch_max_len - words_len)
                    ]
                    label_ids = label_ids[0:batch_max_len]
                    label_ids += [
                        padding_id for _ in range(batch_max_len - words_len)
                    ]
                    assert len(word_ids) == len(label_ids)
                    new_batch.append((word_ids, label_ids, words_len))
                yield new_batch
                batch, init_lens = [], []

    return __reader__


def create_dataloader(reader, place):
524 525 526
    data_loader = fluid.io.DataLoader.from_generator(
        capacity=16, use_double_buffer=True, iterable=True
    )
527 528 529 530 531 532 533 534 535

    data_loader.set_sample_list_generator(reader, places=place)

    return data_loader


class TestLACModel(unittest.TestCase):
    def setUp(self):
        self.args = Args()
536 537 538 539 540
        self.place = (
            fluid.CUDAPlace(0)
            if fluid.is_compiled_with_cuda()
            else fluid.CPUPlace()
        )
541 542 543 544 545 546 547 548 549
        self.temp_dir = tempfile.TemporaryDirectory()
        self.model_save_dir = os.path.join(self.temp_dir.name, 'inference')
        self.model_save_prefix = os.path.join(self.model_save_dir, 'lac')
        self.model_filename = "lac" + INFER_MODEL_SUFFIX
        self.params_filename = "lac" + INFER_PARAMS_SUFFIX
        self.dy_param_path = os.path.join(self.temp_dir.name, 'lac_dy_param')

    def train(self, args, to_static):
        program_translator.enable(to_static)
550 551 552 553 554
        place = (
            fluid.CUDAPlace(0)
            if fluid.is_compiled_with_cuda()
            else fluid.CPUPlace()
        )
555 556 557 558
        with fluid.dygraph.guard(place):
            paddle.seed(SEED)
            paddle.framework.random._manual_program_seed(SEED)

559 560 561
            reader = get_random_input_data(
                args.batch_size, args.vocab_size, args.num_labels
            )
562 563 564 565 566
            train_loader = create_dataloader(reader, place)

            model = LexNet(args)
            optimizer = fluid.optimizer.AdamOptimizer(
                learning_rate=args.base_learning_rate,
567 568 569 570 571
                parameter_list=model.parameters(),
            )
            chunk_eval = ChunkEval(
                int(math.ceil((args.num_labels - 1) / 2.0)), "IOB"
            )
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591

            step = 0
            chunk_evaluator = fluid.metrics.ChunkEvaluator()
            chunk_evaluator.reset()

            loss_data = []
            for epoch_id in range(args.epoch):
                for batch in train_loader():
                    words, targets, length = batch
                    start_time = time.time()
                    avg_cost, crf_decode = model(words, targets, length)
                    loss_data.append(avg_cost.numpy()[0])

                    # backward and optimization
                    avg_cost.backward()
                    optimizer.minimize(avg_cost)
                    model.clear_gradients()
                    end_time = time.time()

                    if step % args.print_steps == 0:
592 593 594 595 596 597 598 599 600 601
                        (
                            precision,
                            recall,
                            f1_score,
                            num_infer_chunks,
                            num_label_chunks,
                            num_correct_chunks,
                        ) = chunk_eval(
                            input=crf_decode, label=targets, seq_length=length
                        )
602 603 604 605 606 607 608
                        outputs = [avg_cost, precision, recall, f1_score]
                        avg_cost, precision, recall, f1_score = [
                            np.mean(x.numpy()) for x in outputs
                        ]

                        print(
                            "[train] step = %d, loss = %f, P: %f, R: %f, F1: %f, elapsed time %f"
609 610 611 612 613 614 615 616 617
                            % (
                                step,
                                avg_cost,
                                precision,
                                recall,
                                f1_score,
                                end_time - start_time,
                            )
                        )
618 619 620 621

                    step += 1
            # save inference model
            if to_static:
622
                paddle.jit.save(
623 624 625
                    layer=model,
                    path=self.model_save_prefix,
                    input_spec=[input_specs[0], input_specs[-1]],
626 627
                    output_spec=[crf_decode],
                )
628
            else:
629 630 631
                fluid.dygraph.save_dygraph(
                    model.state_dict(), self.dy_param_path
                )
632

633
            return np.array(loss_data)
634 635

    def test_train(self):
636 637
        st_out = self.train(self.args, to_static=True)
        dy_out = self.train(self.args, to_static=False)
638 639 640 641 642
        np.testing.assert_allclose(
            dy_out,
            st_out,
            rtol=1e-05,
            err_msg='dygraph output:\n{},\nstatic output:\n {}.'.format(
643 644 645
                dy_out, st_out
            ),
        )
646
        # Prediction needs trained models, so put `test_predict` at last of `test_train`
647
        # self.verify_predict()
648 649

    def verify_predict(self):
650 651 652
        reader = get_random_input_data(
            self.args.batch_size, self.args.vocab_size, self.args.num_labels
        )
653 654 655 656
        for batch in reader():
            batch = [np.vstack(var) for var in zip(*batch)]
            dy_pre = self.predict_dygraph(batch)
            st_pre = self.predict_static(batch)
657
            dy_jit_pre = self.predict_dygraph_jit(batch)
658 659
            np.testing.assert_allclose(dy_pre, st_pre, rtol=1e-05)
            np.testing.assert_allclose(dy_jit_pre, st_pre, rtol=1e-05)
660 661 662 663 664 665 666

    def predict_dygraph(self, batch):
        words, targets, length = batch
        program_translator.enable(False)
        with fluid.dygraph.guard(self.place):
            model = LexNet(self.args)
            # load dygraph trained parameters
667
            model_dict, _ = fluid.load_dygraph(self.dy_param_path + ".pdparams")
668 669 670
            model.set_dict(model_dict)
            model.eval()

671 672 673
            _, pred_res = model(
                to_variable(words), to_variable(targets), to_variable(length)
            )
674 675 676 677 678 679 680 681

            return pred_res.numpy()

    def predict_static(self, batch):
        """
        LAC model contains h_0 created in `__init__` that is necessary for inferring.
        Load inference model to test it's ok for prediction.
        """
682
        paddle.enable_static()
683 684
        exe = fluid.Executor(self.place)
        # load inference model
685 686 687 688 689 690 691 692 693 694
        [
            inference_program,
            feed_target_names,
            fetch_targets,
        ] = fluid.io.load_inference_model(
            self.model_save_dir,
            executor=exe,
            model_filename=self.model_filename,
            params_filename=self.params_filename,
        )
695 696

        words, targets, length = batch
697 698 699 700 701
        pred_res = exe.run(
            inference_program,
            feed={feed_target_names[0]: words, feed_target_names[1]: length},
            fetch_list=fetch_targets,
        )
702 703
        return pred_res[0]

704 705 706
    def predict_dygraph_jit(self, batch):
        words, targets, length = batch
        with fluid.dygraph.guard(self.place):
707
            model = paddle.jit.load(self.model_save_prefix)
708 709 710 711 712 713
            model.eval()

            pred_res = model(to_variable(words), to_variable(length))

            return pred_res.numpy()

714 715

if __name__ == "__main__":
0
0x45f 已提交
716 717
    with fluid.framework._test_eager_guard():
        unittest.main()