model.py 7.6 KB
Newer Older
Y
Yibing Liu 已提交
1
import six
Y
Yibing Liu 已提交
2 3 4 5 6 7 8
import numpy as np
import paddle.fluid as fluid
import utils.layers as layers


class Net(object):
    def __init__(self, max_turn_num, max_turn_len, vocab_size, emb_size,
Y
Yibing Liu 已提交
9
                 stack_num, channel1_num, channel2_num):
Y
Yibing Liu 已提交
10 11 12 13 14 15

        self._max_turn_num = max_turn_num
        self._max_turn_len = max_turn_len
        self._vocab_size = vocab_size
        self._emb_size = emb_size
        self._stack_num = stack_num
Y
Yibing Liu 已提交
16 17
        self._channel1_num = channel1_num
        self._channel2_num = channel2_num
Y
Yibing Liu 已提交
18
        self._feed_names = []
Y
Yibing Liu 已提交
19
        self.word_emb_name = "shared_word_emb"
S
sneaxiy 已提交
20 21 22
        self.use_stack_op = True
        self.use_mask_cache = True
        self.use_sparse_embedding = True
Y
Yibing Liu 已提交
23

Y
Yibing Liu 已提交
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
    def create_py_reader(self, capacity, name):
        # turns ids
        shapes = [[-1, self._max_turn_len, 1]
                  for i in six.moves.xrange(self._max_turn_num)]
        dtypes = ["int32" for i in six.moves.xrange(self._max_turn_num)]
        # turns mask
        shapes += [[-1, self._max_turn_len, 1]
                   for i in six.moves.xrange(self._max_turn_num)]
        dtypes += ["float32" for i in six.moves.xrange(self._max_turn_num)]

        # response ids, response mask, label
        shapes += [[-1, self._max_turn_len, 1], [-1, self._max_turn_len, 1],
                   [-1, 1]]
        dtypes += ["int32", "float32", "float32"]

        py_reader = fluid.layers.py_reader(
            capacity=capacity,
            shapes=shapes,
            lod_levels=[0] * (2 * self._max_turn_num + 3),
            dtypes=dtypes,
            name=name,
            use_double_buffer=True)

        data_vars = fluid.layers.read_file(py_reader)

        self.turns_data = data_vars[0:self._max_turn_num]
        self.turns_mask = data_vars[self._max_turn_num:2 * self._max_turn_num]
        self.response = data_vars[-3]
        self.response_mask = data_vars[-2]
        self.label = data_vars[-1]
        return py_reader

    def create_data_layers(self):
        self._feed_names = []

        self.turns_data = []
Y
Yibing Liu 已提交
60
        for i in six.moves.xrange(self._max_turn_num):
Y
Yibing Liu 已提交
61
            name = "turn_%d" % i
Y
Yibing Liu 已提交
62
            turn = fluid.layers.data(
Y
Yibing Liu 已提交
63 64 65
                name=name, shape=[self._max_turn_len, 1], dtype="int32")
            self.turns_data.append(turn)
            self._feed_names.append(name)
Y
Yibing Liu 已提交
66

Y
Yibing Liu 已提交
67
        self.turns_mask = []
Y
Yibing Liu 已提交
68
        for i in six.moves.xrange(self._max_turn_num):
Y
Yibing Liu 已提交
69
            name = "turn_mask_%d" % i
Y
Yibing Liu 已提交
70
            turn_mask = fluid.layers.data(
Y
Yibing Liu 已提交
71 72 73
                name=name, shape=[self._max_turn_len, 1], dtype="float32")
            self.turns_mask.append(turn_mask)
            self._feed_names.append(name)
Y
Yibing Liu 已提交
74

Y
Yibing Liu 已提交
75
        self.response = fluid.layers.data(
Y
Yibing Liu 已提交
76
            name="response", shape=[self._max_turn_len, 1], dtype="int32")
Y
Yibing Liu 已提交
77
        self.response_mask = fluid.layers.data(
S
sneaxiy 已提交
78 79 80
            name="response_mask",
            shape=[self._max_turn_len, 1],
            dtype="float32")
Y
Yibing Liu 已提交
81 82 83 84 85 86 87 88 89 90 91 92 93
        self.label = fluid.layers.data(name="label", shape=[1], dtype="float32")
        self._feed_names += ["response", "response_mask", "label"]

    def get_feed_names(self):
        return self._feed_names

    def set_word_embedding(self, word_emb, place):
        word_emb_param = fluid.global_scope().find_var(
            self.word_emb_name).get_tensor()
        word_emb_param.set(word_emb, place)

    def create_network(self):
        mask_cache = dict() if self.use_mask_cache else None
Y
Yibing Liu 已提交
94 95

        response_emb = fluid.layers.embedding(
Y
Yibing Liu 已提交
96
            input=self.response,
Y
Yibing Liu 已提交
97
            size=[self._vocab_size + 1, self._emb_size],
S
sneaxiy 已提交
98
            is_sparse=self.use_sparse_embedding,
Y
Yibing Liu 已提交
99 100 101 102 103 104 105 106
            param_attr=fluid.ParamAttr(
                name=self.word_emb_name,
                initializer=fluid.initializer.Normal(scale=0.1)))

        # response part
        Hr = response_emb
        Hr_stack = [Hr]

Y
Yibing Liu 已提交
107
        for index in six.moves.xrange(self._stack_num):
Y
Yibing Liu 已提交
108 109 110 111 112 113
            Hr = layers.block(
                name="response_self_stack" + str(index),
                query=Hr,
                key=Hr,
                value=Hr,
                d_key=self._emb_size,
Y
Yibing Liu 已提交
114 115
                q_mask=self.response_mask,
                k_mask=self.response_mask,
S
sneaxiy 已提交
116
                mask_cache=mask_cache)
Y
Yibing Liu 已提交
117 118 119 120
            Hr_stack.append(Hr)

        # context part
        sim_turns = []
Y
Yibing Liu 已提交
121
        for t in six.moves.xrange(self._max_turn_num):
Y
Yibing Liu 已提交
122
            Hu = fluid.layers.embedding(
Y
Yibing Liu 已提交
123
                input=self.turns_data[t],
Y
Yibing Liu 已提交
124
                size=[self._vocab_size + 1, self._emb_size],
S
sneaxiy 已提交
125
                is_sparse=self.use_sparse_embedding,
Y
Yibing Liu 已提交
126 127 128 129 130
                param_attr=fluid.ParamAttr(
                    name=self.word_emb_name,
                    initializer=fluid.initializer.Normal(scale=0.1)))
            Hu_stack = [Hu]

Y
Yibing Liu 已提交
131
            for index in six.moves.xrange(self._stack_num):
Y
Yibing Liu 已提交
132 133 134 135 136 137 138
                # share parameters
                Hu = layers.block(
                    name="turn_self_stack" + str(index),
                    query=Hu,
                    key=Hu,
                    value=Hu,
                    d_key=self._emb_size,
Y
Yibing Liu 已提交
139 140
                    q_mask=self.turns_mask[t],
                    k_mask=self.turns_mask[t],
S
sneaxiy 已提交
141
                    mask_cache=mask_cache)
Y
Yibing Liu 已提交
142 143 144 145 146
                Hu_stack.append(Hu)

            # cross attention 
            r_a_t_stack = []
            t_a_r_stack = []
Y
Yibing Liu 已提交
147
            for index in six.moves.xrange(self._stack_num + 1):
Y
Yibing Liu 已提交
148 149 150 151 152 153
                t_a_r = layers.block(
                    name="t_attend_r_" + str(index),
                    query=Hu_stack[index],
                    key=Hr_stack[index],
                    value=Hr_stack[index],
                    d_key=self._emb_size,
Y
Yibing Liu 已提交
154 155
                    q_mask=self.turns_mask[t],
                    k_mask=self.response_mask,
S
sneaxiy 已提交
156
                    mask_cache=mask_cache)
Y
Yibing Liu 已提交
157 158 159 160 161 162
                r_a_t = layers.block(
                    name="r_attend_t_" + str(index),
                    query=Hr_stack[index],
                    key=Hu_stack[index],
                    value=Hu_stack[index],
                    d_key=self._emb_size,
Y
Yibing Liu 已提交
163 164
                    q_mask=self.response_mask,
                    k_mask=self.turns_mask[t],
S
sneaxiy 已提交
165
                    mask_cache=mask_cache)
Y
Yibing Liu 已提交
166 167 168 169 170 171 172

                t_a_r_stack.append(t_a_r)
                r_a_t_stack.append(r_a_t)

            t_a_r_stack.extend(Hu_stack)
            r_a_t_stack.extend(Hr_stack)

S
sneaxiy 已提交
173 174 175 176
            if self.use_stack_op:
                t_a_r = fluid.layers.stack(t_a_r_stack, axis=1)
                r_a_t = fluid.layers.stack(r_a_t_stack, axis=1)
            else:
Y
Yibing Liu 已提交
177
                for index in six.moves.xrange(len(t_a_r_stack)):
S
sneaxiy 已提交
178 179 180 181
                    t_a_r_stack[index] = fluid.layers.unsqueeze(
                        input=t_a_r_stack[index], axes=[1])
                    r_a_t_stack[index] = fluid.layers.unsqueeze(
                        input=r_a_t_stack[index], axes=[1])
Y
Yibing Liu 已提交
182

S
sneaxiy 已提交
183 184
                t_a_r = fluid.layers.concat(input=t_a_r_stack, axis=1)
                r_a_t = fluid.layers.concat(input=r_a_t_stack, axis=1)
Y
Yibing Liu 已提交
185

Y
Yibing Liu 已提交
186
            # sim shape: [batch_size, 2*(stack_num+1), max_turn_len, max_turn_len]    
S
sneaxiy 已提交
187 188
            sim = fluid.layers.matmul(
                x=t_a_r, y=r_a_t, transpose_y=True, alpha=1 / np.sqrt(200.0))
Y
Yibing Liu 已提交
189 190
            sim_turns.append(sim)

S
sneaxiy 已提交
191 192 193
        if self.use_stack_op:
            sim = fluid.layers.stack(sim_turns, axis=2)
        else:
Y
Yibing Liu 已提交
194
            for index in six.moves.xrange(len(sim_turns)):
S
sneaxiy 已提交
195 196
                sim_turns[index] = fluid.layers.unsqueeze(
                    input=sim_turns[index], axes=[2])
Y
Yibing Liu 已提交
197
            # sim shape: [batch_size, 2*(stack_num+1), max_turn_num, max_turn_len, max_turn_len]
S
sneaxiy 已提交
198
            sim = fluid.layers.concat(input=sim_turns, axis=2)
Y
Yibing Liu 已提交
199

Y
Yibing Liu 已提交
200
        final_info = layers.cnn_3d(sim, self._channel1_num, self._channel2_num)
Y
Yibing Liu 已提交
201
        loss, logits = layers.loss(final_info, self.label)
Y
Yibing Liu 已提交
202
        return loss, logits