model.py 4.9 KB
Newer Older
Y
Yibing Liu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
import cPickle as pickle
import numpy as np
import paddle.fluid as fluid
import utils.layers as layers


class Net(object):
    def __init__(self, max_turn_num, max_turn_len, vocab_size, emb_size,
                 stack_num):

        self._max_turn_num = max_turn_num
        self._max_turn_len = max_turn_len
        self._vocab_size = vocab_size
        self._emb_size = emb_size
        self._stack_num = stack_num
        self.word_emb_name = "shared_word_emb"

    def create_network(self):
        turns_data = []
        for i in xrange(self._max_turn_num):
            turn = fluid.layers.data(
                name="turn_%d" % i,
                shape=[self._max_turn_len, 1],
                dtype="int32")
            turns_data.append(turn)

        turns_mask = []
        for i in xrange(self._max_turn_num):
            turn_mask = fluid.layers.data(
                name="turn_mask_%d" % i,
                shape=[self._max_turn_len],
                dtype="float32")
            turns_mask.append(turn_mask)

        response = fluid.layers.data(
            name="response", shape=[self._max_turn_len, 1], dtype="int32")
        response_mask = fluid.layers.data(
            name="response_mask", shape=[self._max_turn_len], dtype="float32")
        label = fluid.layers.data(name="label", shape=[1], dtype="float32")

        response_emb = fluid.layers.embedding(
            input=response,
            size=[self._vocab_size + 1, self._emb_size],
            param_attr=fluid.ParamAttr(
                name=self.word_emb_name,
                initializer=fluid.initializer.Normal(scale=0.1)))

        # response part
        Hr = response_emb
        Hr_stack = [Hr]

        for index in range(self._stack_num):
            Hr = layers.block(
                name="response_self_stack" + str(index),
                query=Hr,
                key=Hr,
                value=Hr,
                d_key=self._emb_size,
                q_mask=response_mask,
                k_mask=response_mask)
            Hr_stack.append(Hr)

        # context part
        sim_turns = []
        for t in xrange(self._max_turn_num):
            Hu = fluid.layers.embedding(
                input=turns_data[t],
                size=[self._vocab_size + 1, self._emb_size],
                param_attr=fluid.ParamAttr(
                    name=self.word_emb_name,
                    initializer=fluid.initializer.Normal(scale=0.1)))
            Hu_stack = [Hu]

            for index in range(self._stack_num):
                # share parameters
                Hu = layers.block(
                    name="turn_self_stack" + str(index),
                    query=Hu,
                    key=Hu,
                    value=Hu,
                    d_key=self._emb_size,
                    q_mask=turns_mask[t],
                    k_mask=turns_mask[t])
                Hu_stack.append(Hu)

            # cross attention 
            r_a_t_stack = []
            t_a_r_stack = []
            for index in range(self._stack_num + 1):
                t_a_r = layers.block(
                    name="t_attend_r_" + str(index),
                    query=Hu_stack[index],
                    key=Hr_stack[index],
                    value=Hr_stack[index],
                    d_key=self._emb_size,
                    q_mask=turns_mask[t],
                    k_mask=response_mask)
                r_a_t = layers.block(
                    name="r_attend_t_" + str(index),
                    query=Hr_stack[index],
                    key=Hu_stack[index],
                    value=Hu_stack[index],
                    d_key=self._emb_size,
                    q_mask=response_mask,
                    k_mask=turns_mask[t])

                t_a_r_stack.append(t_a_r)
                r_a_t_stack.append(r_a_t)

            t_a_r_stack.extend(Hu_stack)
            r_a_t_stack.extend(Hr_stack)

            for index in xrange(len(t_a_r_stack)):
                t_a_r_stack[index] = fluid.layers.unsqueeze(
                    input=t_a_r_stack[index], axes=[1])
                r_a_t_stack[index] = fluid.layers.unsqueeze(
                    input=r_a_t_stack[index], axes=[1])

            t_a_r = fluid.layers.concat(input=t_a_r_stack, axis=1)
            r_a_t = fluid.layers.concat(input=r_a_t_stack, axis=1)

            # sim shape: [batch_size, 2*(stack_num+2), max_turn_len, max_turn_len]    
            sim = fluid.layers.matmul(x=t_a_r, y=r_a_t, transpose_y=True)
            sim = fluid.layers.scale(x=sim, scale=1 / np.sqrt(200.0))
            sim_turns.append(sim)

        for index in xrange(len(sim_turns)):
            sim_turns[index] = fluid.layers.unsqueeze(
                input=sim_turns[index], axes=[2])
        # sim shape: [batch_size, 2*(stack_num+2), max_turn_num, max_turn_len, max_turn_len]
        sim = fluid.layers.concat(input=sim_turns, axis=2)

        # for douban
        final_info = layers.cnn_3d(sim, 16, 16)
        loss, logits = layers.loss(final_info, label)
        return loss, logits