net.py 8.6 KB
Newer Older
L
Li Fuchen 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Y
Yibing Liu 已提交
14 15 16 17
"""
Deep Attention Matching Network
"""

Y
Yibing Liu 已提交
18
import six
Y
Yibing Liu 已提交
19 20
import numpy as np
import paddle.fluid as fluid
Y
Yibing Liu 已提交
21
import layers
Y
Yibing Liu 已提交
22 23 24


class Net(object):
Y
Yibing Liu 已提交
25 26 27
    """
    Deep attention matching network
    """
L
Li Fuchen 已提交
28

Y
Yibing Liu 已提交
29
    def __init__(self, max_turn_num, max_turn_len, vocab_size, emb_size,
Y
Yibing Liu 已提交
30
                 stack_num, channel1_num, channel2_num):
Y
Yibing Liu 已提交
31 32 33
        """
        Init
        """
Y
Yibing Liu 已提交
34 35 36 37 38
        self._max_turn_num = max_turn_num
        self._max_turn_len = max_turn_len
        self._vocab_size = vocab_size
        self._emb_size = emb_size
        self._stack_num = stack_num
Y
Yibing Liu 已提交
39 40
        self._channel1_num = channel1_num
        self._channel2_num = channel2_num
Y
Yibing Liu 已提交
41
        self._feed_names = []
Y
Yibing Liu 已提交
42
        self.word_emb_name = "shared_word_emb"
S
sneaxiy 已提交
43 44 45
        self.use_stack_op = True
        self.use_mask_cache = True
        self.use_sparse_embedding = True
Y
Yibing Liu 已提交
46

Y
Yibing Liu 已提交
47
    def create_py_reader(self, capacity, name):
Y
Yibing Liu 已提交
48 49 50
        """
        Create py reader
        """
Y
Yibing Liu 已提交
51 52 53
        # turns ids
        shapes = [[-1, self._max_turn_len, 1]
                  for i in six.moves.xrange(self._max_turn_num)]
54
        dtypes = ["int64" for i in six.moves.xrange(self._max_turn_num)]
Y
Yibing Liu 已提交
55 56 57 58 59 60 61 62
        # turns mask
        shapes += [[-1, self._max_turn_len, 1]
                   for i in six.moves.xrange(self._max_turn_num)]
        dtypes += ["float32" for i in six.moves.xrange(self._max_turn_num)]

        # response ids, response mask, label
        shapes += [[-1, self._max_turn_len, 1], [-1, self._max_turn_len, 1],
                   [-1, 1]]
63
        dtypes += ["int64", "float32", "float32"]
Y
Yibing Liu 已提交
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82

        py_reader = fluid.layers.py_reader(
            capacity=capacity,
            shapes=shapes,
            lod_levels=[0] * (2 * self._max_turn_num + 3),
            dtypes=dtypes,
            name=name,
            use_double_buffer=True)

        data_vars = fluid.layers.read_file(py_reader)

        self.turns_data = data_vars[0:self._max_turn_num]
        self.turns_mask = data_vars[self._max_turn_num:2 * self._max_turn_num]
        self.response = data_vars[-3]
        self.response_mask = data_vars[-2]
        self.label = data_vars[-1]
        return py_reader

    def create_data_layers(self):
Y
Yibing Liu 已提交
83 84 85
        """
        Create data layer
        """
Y
Yibing Liu 已提交
86 87 88
        self._feed_names = []

        self.turns_data = []
Y
Yibing Liu 已提交
89
        for i in six.moves.xrange(self._max_turn_num):
Y
Yibing Liu 已提交
90
            name = "turn_%d" % i
Y
Yibing Liu 已提交
91
            turn = fluid.layers.data(
92
                name=name, shape=[self._max_turn_len, 1], dtype="int64")
Y
Yibing Liu 已提交
93 94
            self.turns_data.append(turn)
            self._feed_names.append(name)
Y
Yibing Liu 已提交
95

Y
Yibing Liu 已提交
96
        self.turns_mask = []
Y
Yibing Liu 已提交
97
        for i in six.moves.xrange(self._max_turn_num):
Y
Yibing Liu 已提交
98
            name = "turn_mask_%d" % i
Y
Yibing Liu 已提交
99
            turn_mask = fluid.layers.data(
Y
Yibing Liu 已提交
100 101 102
                name=name, shape=[self._max_turn_len, 1], dtype="float32")
            self.turns_mask.append(turn_mask)
            self._feed_names.append(name)
Y
Yibing Liu 已提交
103

Y
Yibing Liu 已提交
104
        self.response = fluid.layers.data(
105
            name="response", shape=[self._max_turn_len, 1], dtype="int64")
Y
Yibing Liu 已提交
106
        self.response_mask = fluid.layers.data(
S
sneaxiy 已提交
107 108 109
            name="response_mask",
            shape=[self._max_turn_len, 1],
            dtype="float32")
Y
Yibing Liu 已提交
110 111 112 113
        self.label = fluid.layers.data(name="label", shape=[1], dtype="float32")
        self._feed_names += ["response", "response_mask", "label"]

    def get_feed_names(self):
Y
Yibing Liu 已提交
114 115 116
        """
        Return feed names
        """
Y
Yibing Liu 已提交
117 118 119
        return self._feed_names

    def set_word_embedding(self, word_emb, place):
Y
Yibing Liu 已提交
120 121 122
        """
        Set word embedding
        """
Y
Yibing Liu 已提交
123 124 125 126 127
        word_emb_param = fluid.global_scope().find_var(
            self.word_emb_name).get_tensor()
        word_emb_param.set(word_emb, place)

    def create_network(self):
Y
Yibing Liu 已提交
128 129 130
        """
        Create network
        """
Y
Yibing Liu 已提交
131
        mask_cache = dict() if self.use_mask_cache else None
Y
Yibing Liu 已提交
132 133

        response_emb = fluid.layers.embedding(
Y
Yibing Liu 已提交
134
            input=self.response,
Y
Yibing Liu 已提交
135
            size=[self._vocab_size + 1, self._emb_size],
S
sneaxiy 已提交
136
            is_sparse=self.use_sparse_embedding,
Y
Yibing Liu 已提交
137 138 139 140 141 142 143 144
            param_attr=fluid.ParamAttr(
                name=self.word_emb_name,
                initializer=fluid.initializer.Normal(scale=0.1)))

        # response part
        Hr = response_emb
        Hr_stack = [Hr]

Y
Yibing Liu 已提交
145
        for index in six.moves.xrange(self._stack_num):
Y
Yibing Liu 已提交
146 147 148 149 150 151
            Hr = layers.block(
                name="response_self_stack" + str(index),
                query=Hr,
                key=Hr,
                value=Hr,
                d_key=self._emb_size,
Y
Yibing Liu 已提交
152 153
                q_mask=self.response_mask,
                k_mask=self.response_mask,
S
sneaxiy 已提交
154
                mask_cache=mask_cache)
Y
Yibing Liu 已提交
155 156 157 158
            Hr_stack.append(Hr)

        # context part
        sim_turns = []
Y
Yibing Liu 已提交
159
        for t in six.moves.xrange(self._max_turn_num):
Y
Yibing Liu 已提交
160
            Hu = fluid.layers.embedding(
Y
Yibing Liu 已提交
161
                input=self.turns_data[t],
Y
Yibing Liu 已提交
162
                size=[self._vocab_size + 1, self._emb_size],
S
sneaxiy 已提交
163
                is_sparse=self.use_sparse_embedding,
Y
Yibing Liu 已提交
164 165 166 167 168
                param_attr=fluid.ParamAttr(
                    name=self.word_emb_name,
                    initializer=fluid.initializer.Normal(scale=0.1)))
            Hu_stack = [Hu]

Y
Yibing Liu 已提交
169
            for index in six.moves.xrange(self._stack_num):
Y
Yibing Liu 已提交
170 171 172 173 174 175 176
                # share parameters
                Hu = layers.block(
                    name="turn_self_stack" + str(index),
                    query=Hu,
                    key=Hu,
                    value=Hu,
                    d_key=self._emb_size,
Y
Yibing Liu 已提交
177 178
                    q_mask=self.turns_mask[t],
                    k_mask=self.turns_mask[t],
S
sneaxiy 已提交
179
                    mask_cache=mask_cache)
Y
Yibing Liu 已提交
180 181
                Hu_stack.append(Hu)

182
            # cross attention
Y
Yibing Liu 已提交
183 184
            r_a_t_stack = []
            t_a_r_stack = []
Y
Yibing Liu 已提交
185
            for index in six.moves.xrange(self._stack_num + 1):
Y
Yibing Liu 已提交
186 187 188 189 190 191
                t_a_r = layers.block(
                    name="t_attend_r_" + str(index),
                    query=Hu_stack[index],
                    key=Hr_stack[index],
                    value=Hr_stack[index],
                    d_key=self._emb_size,
Y
Yibing Liu 已提交
192 193
                    q_mask=self.turns_mask[t],
                    k_mask=self.response_mask,
S
sneaxiy 已提交
194
                    mask_cache=mask_cache)
Y
Yibing Liu 已提交
195 196 197 198 199 200
                r_a_t = layers.block(
                    name="r_attend_t_" + str(index),
                    query=Hr_stack[index],
                    key=Hu_stack[index],
                    value=Hu_stack[index],
                    d_key=self._emb_size,
Y
Yibing Liu 已提交
201 202
                    q_mask=self.response_mask,
                    k_mask=self.turns_mask[t],
S
sneaxiy 已提交
203
                    mask_cache=mask_cache)
Y
Yibing Liu 已提交
204 205 206 207 208 209 210

                t_a_r_stack.append(t_a_r)
                r_a_t_stack.append(r_a_t)

            t_a_r_stack.extend(Hu_stack)
            r_a_t_stack.extend(Hr_stack)

S
sneaxiy 已提交
211 212 213 214
            if self.use_stack_op:
                t_a_r = fluid.layers.stack(t_a_r_stack, axis=1)
                r_a_t = fluid.layers.stack(r_a_t_stack, axis=1)
            else:
Y
Yibing Liu 已提交
215
                for index in six.moves.xrange(len(t_a_r_stack)):
S
sneaxiy 已提交
216 217 218 219
                    t_a_r_stack[index] = fluid.layers.unsqueeze(
                        input=t_a_r_stack[index], axes=[1])
                    r_a_t_stack[index] = fluid.layers.unsqueeze(
                        input=r_a_t_stack[index], axes=[1])
Y
Yibing Liu 已提交
220

S
sneaxiy 已提交
221 222
                t_a_r = fluid.layers.concat(input=t_a_r_stack, axis=1)
                r_a_t = fluid.layers.concat(input=r_a_t_stack, axis=1)
Y
Yibing Liu 已提交
223

224
            # sim shape: [batch_size, 2*(stack_num+1), max_turn_len, max_turn_len]
S
sneaxiy 已提交
225 226
            sim = fluid.layers.matmul(
                x=t_a_r, y=r_a_t, transpose_y=True, alpha=1 / np.sqrt(200.0))
Y
Yibing Liu 已提交
227 228
            sim_turns.append(sim)

S
sneaxiy 已提交
229 230 231
        if self.use_stack_op:
            sim = fluid.layers.stack(sim_turns, axis=2)
        else:
Y
Yibing Liu 已提交
232
            for index in six.moves.xrange(len(sim_turns)):
S
sneaxiy 已提交
233 234
                sim_turns[index] = fluid.layers.unsqueeze(
                    input=sim_turns[index], axes=[2])
Y
Yibing Liu 已提交
235
            # sim shape: [batch_size, 2*(stack_num+1), max_turn_num, max_turn_len, max_turn_len]
S
sneaxiy 已提交
236
            sim = fluid.layers.concat(input=sim_turns, axis=2)
Y
Yibing Liu 已提交
237

Y
Yibing Liu 已提交
238
        final_info = layers.cnn_3d(sim, self._channel1_num, self._channel2_num)
Y
Yibing Liu 已提交
239
        loss, logits = layers.loss(final_info, self.label)
Y
Yibing Liu 已提交
240
        return loss, logits