test_rnn_decode_api.py 13.2 KB
Newer Older
1
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
G
Guo Sheng 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import random
G
Guo Sheng 已提交
16
import unittest
17

18
import numpy as np
G
Guo Sheng 已提交
19

20
import paddle
21 22
import paddle.fluid as fluid
import paddle.fluid.layers as layers
23 24 25
import paddle.nn as nn
from paddle import Model, set_device
from paddle.fluid.dygraph import Layer
26
from paddle.fluid.framework import _test_eager_guard
27 28
from paddle.nn import BeamSearchDecoder, dynamic_decode
from paddle.static import InputSpec as Input
29

30 31
paddle.enable_static()

G
Guo Sheng 已提交
32

33
class PolicyGradient:
34 35 36 37 38 39 40 41 42
    """policy gradient"""

    def __init__(self, lr=None):
        self.lr = lr

    def learn(self, act_prob, action, reward, length=None):
        """
        update policy model self.model with policy gradient algorithm
        """
43
        self.reward = paddle.static.py_func(
44 45
            func=reward_func, x=[action, length], out=reward
        )
46 47 48
        neg_log_prob = paddle.nn.functional.cross_entropy(
            act_prob, action, reduction='none', use_softmax=False
        )
49
        cost = neg_log_prob * reward
50
        cost = (
51
            (paddle.sum(cost) / paddle.sum(length))
52
            if length is not None
53
            else paddle.mean(cost)
54
        )
55 56 57 58 59 60 61 62
        optimizer = fluid.optimizer.Adam(self.lr)
        optimizer.minimize(cost)
        return cost


def reward_func(samples, sample_length):
    """toy reward"""

63
    def discount_reward(reward, sequence_length, discount=1.0):
64 65
        return discount_reward_1d(reward, sequence_length, discount)

66
    def discount_reward_1d(reward, sequence_length, discount=1.0, dtype=None):
67 68
        if sequence_length is None:
            raise ValueError(
69 70
                'sequence_length must not be `None` for 1D reward.'
            )
71 72 73 74 75
        reward = np.array(reward)
        sequence_length = np.array(sequence_length)
        batch_size = reward.shape[0]
        max_seq_length = np.max(sequence_length)
        dtype = dtype or reward.dtype
76
        if discount == 1.0:
77
            dmat = np.ones([batch_size, max_seq_length], dtype=dtype)
G
Guo Sheng 已提交
78
        else:
79
            steps = np.tile(np.arange(max_seq_length), [batch_size, 1])
80 81 82
            mask = np.asarray(
                steps < (sequence_length - 1)[:, None], dtype=dtype
            )
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
            # Make each row = [discount, ..., discount, 1, ..., 1]
            dmat = mask * discount + (1 - mask)
            dmat = np.cumprod(dmat[:, ::-1], axis=1)[:, ::-1]
        disc_reward = dmat * reward[:, None]
        disc_reward = mask_sequences(disc_reward, sequence_length, dtype=dtype)
        return disc_reward

    def mask_sequences(sequence, sequence_length, dtype=None, time_major=False):
        sequence = np.array(sequence)
        sequence_length = np.array(sequence_length)
        rank = sequence.ndim
        if rank < 2:
            raise ValueError("`sequence` must be 2D or higher order.")
        batch_size = sequence.shape[0]
        max_time = sequence.shape[1]
        dtype = dtype or sequence.dtype
        if time_major:
            sequence = np.transpose(sequence, axes=[1, 0, 2])
        steps = np.tile(np.arange(max_time), [batch_size, 1])
        mask = np.asarray(steps < sequence_length[:, None], dtype=dtype)
        for _ in range(2, rank):
            mask = np.expand_dims(mask, -1)
        sequence = sequence * mask
        if time_major:
            sequence = np.transpose(sequence, axes=[1, 0, 2])
        return sequence

    samples = np.array(samples)
    sample_length = np.array(sample_length)
    # length reward
    reward = (5 - np.abs(sample_length - 5)).astype("float32")
    # repeat punishment to trapped into local minima getting all same words
    # beam search to get more than one sample may also can avoid this
    for i in range(reward.shape[0]):
117 118 119 120 121 122 123 124 125
        reward[i] += (
            -10
            if sample_length[i] > 1
            and np.all(samples[i][: sample_length[i] - 1] == samples[i][0])
            else 0
        )
    return discount_reward(reward, sample_length, discount=1.0).astype(
        "float32"
    )
126 127


128
class MLE:
129 130 131 132 133 134
    """teacher-forcing MLE training"""

    def __init__(self, lr=None):
        self.lr = lr

    def learn(self, probs, label, weight=None, length=None):
135 136 137 138 139 140 141
        loss = paddle.nn.functional.cross_entropy(
            input=probs,
            label=label,
            soft_label=False,
            reduction='none',
            use_softmax=False,
        )
2
201716010711 已提交
142
        max_seq_len = paddle.shape(probs)[1]
143 144
        mask = layers.sequence_mask(length, maxlen=max_seq_len, dtype="float32")
        loss = loss * mask
145
        loss = paddle.mean(loss, axis=[0])
146
        loss = paddle.sum(loss)
147 148 149 150 151
        optimizer = fluid.optimizer.Adam(self.lr)
        optimizer.minimize(loss)
        return loss


152
class SeqPGAgent:
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
    def __init__(
        self,
        model_cls,
        alg_cls=PolicyGradient,
        model_hparams={},
        alg_hparams={},
        executor=None,
        main_program=None,
        startup_program=None,
        seed=None,
    ):
        self.main_program = (
            fluid.Program() if main_program is None else main_program
        )
        self.startup_program = (
            fluid.Program() if startup_program is None else startup_program
        )
170 171 172 173 174 175 176 177 178
        if seed is not None:
            self.main_program.random_seed = seed
            self.startup_program.random_seed = seed
        self.build_program(model_cls, alg_cls, model_hparams, alg_hparams)
        self.executor = executor

    def build_program(self, model_cls, alg_cls, model_hparams, alg_hparams):
        with fluid.program_guard(self.main_program, self.startup_program):
            source = fluid.data(name="src", shape=[None, None], dtype="int64")
179 180 181
            source_length = fluid.data(
                name="src_sequence_length", shape=[None], dtype="int64"
            )
182 183
            # only for teacher-forcing MLE training
            target = fluid.data(name="trg", shape=[None, None], dtype="int64")
184 185 186 187 188 189
            target_length = fluid.data(
                name="trg_sequence_length", shape=[None], dtype="int64"
            )
            label = fluid.data(
                name="label", shape=[None, None, 1], dtype="int64"
            )
190 191 192
            self.model = model_cls(**model_hparams)
            self.alg = alg_cls(**alg_hparams)
            self.probs, self.samples, self.sample_length = self.model(
193 194
                source, source_length, target, target_length
            )
195
            self.samples.stop_gradient = True
196
            self.reward = fluid.data(
197
                name="reward",
198
                shape=[None, None],  # batch_size, seq_len
199 200
                dtype=self.probs.dtype,
            )
201
            self.samples.stop_gradient = False
202 203 204
            self.cost = self.alg.learn(
                self.probs, self.samples, self.reward, self.sample_length
            )
205 206 207 208

        # to define the same parameters between different programs
        self.pred_program = self.main_program._prune_with_input(
            [source.name, source_length.name],
209 210
            [self.probs, self.samples, self.sample_length],
        )
211 212 213 214 215

    def predict(self, feed_dict):
        samples, sample_length = self.executor.run(
            self.pred_program,
            feed=feed_dict,
216 217
            fetch_list=[self.samples, self.sample_length],
        )
218 219 220
        return samples, sample_length

    def learn(self, feed_dict, fetch_list):
221 222 223
        results = self.executor.run(
            self.main_program, feed=feed_dict, fetch_list=fetch_list
        )
224 225 226
        return results


227 228 229 230 231 232 233 234 235
class ModuleApiTest(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls._np_rand_state = np.random.get_state()
        cls._py_rand_state = random.getstate()
        cls._random_seed = 123
        np.random.seed(cls._random_seed)
        random.seed(cls._random_seed)

236
        cls.model_cls = type(
237 238 239
            cls.__name__ + "Model",
            (Layer,),
            {
240
                "__init__": cls.model_init_wrapper(cls.model_init),
241 242 243
                "forward": cls.model_forward,
            },
        )
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260

    @classmethod
    def tearDownClass(cls):
        np.random.set_state(cls._np_rand_state)
        random.setstate(cls._py_rand_state)

    @staticmethod
    def model_init_wrapper(func):
        def __impl__(self, *args, **kwargs):
            Layer.__init__(self)
            func(self, *args, **kwargs)

        return __impl__

    @staticmethod
    def model_init(model, *args, **kwargs):
        raise NotImplementedError(
261 262
            "model_init acts as `Model.__init__`, thus must implement it"
        )
263 264 265 266 267 268 269 270

    @staticmethod
    def model_forward(model, *args, **kwargs):
        return model.module(*args, **kwargs)

    def make_inputs(self):
        # TODO(guosheng): add default from `self.inputs`
        raise NotImplementedError(
271 272
            "model_inputs makes inputs for model, thus must implement it"
        )
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292

    def setUp(self):
        """
        For the model which wraps the module to be tested:
            Set input data by `self.inputs` list
            Set init argument values by `self.attrs` list/dict
            Set model parameter values by `self.param_states` dict
            Set expected output data by `self.outputs` list
        We can create a model instance and run once with these.
        """
        self.inputs = []
        self.attrs = {}
        self.param_states = {}
        self.outputs = []

    def _calc_output(self, place, mode="test", dygraph=True):
        if dygraph:
            fluid.enable_dygraph(place)
        else:
            fluid.disable_dygraph()
C
cnn 已提交
293
        gen = paddle.seed(self._random_seed)
294 295 296
        paddle.framework.random._manual_program_seed(self._random_seed)
        scope = fluid.core.Scope()
        with fluid.scope_guard(scope):
297 298 299 300 301
            layer = (
                self.model_cls(**self.attrs)
                if isinstance(self.attrs, dict)
                else self.model_cls(*self.attrs)
            )
302 303 304 305
            model = Model(layer, inputs=self.make_inputs())
            model.prepare()
            if self.param_states:
                model.load(self.param_states, optim_state=None)
306
            return model.predict_batch(self.inputs)
307 308 309 310 311 312

    def check_output_with_place(self, place, mode="test"):
        dygraph_output = self._calc_output(place, mode, dygraph=True)
        stgraph_output = self._calc_output(place, mode, dygraph=False)
        expect_output = getattr(self, "outputs", None)
        for actual_t, expect_t in zip(dygraph_output, stgraph_output):
313
            np.testing.assert_allclose(actual_t, expect_t, rtol=1e-05, atol=0)
314 315
        if expect_output:
            for actual_t, expect_t in zip(dygraph_output, expect_output):
316 317 318
                np.testing.assert_allclose(
                    actual_t, expect_t, rtol=1e-05, atol=0
                )
319 320 321 322 323 324 325 326 327 328 329 330 331 332

    def check_output(self):
        devices = ["CPU", "GPU"] if fluid.is_compiled_with_cuda() else ["CPU"]
        for device in devices:
            place = set_device(device)
            self.check_output_with_place(place)


class TestBeamSearch(ModuleApiTest):
    def setUp(self):
        paddle.set_default_dtype("float64")
        shape = (8, 32)
        self.inputs = [
            np.random.random(shape).astype("float64"),
333
            np.random.random(shape).astype("float64"),
334 335 336 337 338 339 340 341 342 343
        ]
        self.outputs = None
        self.attrs = {
            "vocab_size": 100,
            "embed_dim": 32,
            "hidden_size": 32,
        }
        self.param_states = {}

    @staticmethod
344 345 346 347 348 349 350 351 352 353
    def model_init(
        self,
        vocab_size,
        embed_dim,
        hidden_size,
        bos_id=0,
        eos_id=1,
        beam_size=4,
        max_step_num=20,
    ):
354
        embedder = paddle.nn.Embedding(vocab_size, embed_dim)
355 356 357
        output_layer = nn.Linear(hidden_size, vocab_size)
        cell = nn.LSTMCell(embed_dim, hidden_size)
        self.max_step_num = max_step_num
358 359 360 361 362 363 364 365
        self.beam_search_decoder = BeamSearchDecoder(
            cell,
            start_token=bos_id,
            end_token=eos_id,
            beam_size=beam_size,
            embedding_fn=embedder,
            output_fn=output_layer,
        )
366 367 368

    @staticmethod
    def model_forward(model, init_hidden, init_cell):
369 370 371 372 373 374 375
        return dynamic_decode(
            model.beam_search_decoder,
            [init_hidden, init_cell],
            max_step_num=model.max_step_num,
            impute_finished=True,
            is_test=True,
        )[0]
376 377 378 379 380 381 382 383

    def make_inputs(self):
        inputs = [
            Input([None, self.inputs[0].shape[-1]], "float64", "init_hidden"),
            Input([None, self.inputs[1].shape[-1]], "float64", "init_cell"),
        ]
        return inputs

384 385 386
    def func_check_output(self):
        self.setUp()
        self.make_inputs()
387 388
        self.check_output()

389 390 391 392 393
    def test_check_output(self):
        with _test_eager_guard():
            self.func_check_output()
        self.func_check_output()

394

G
Guo Sheng 已提交
395 396
if __name__ == '__main__':
    unittest.main()