network_conf.py 11.5 KB
Newer Older
S
Superjom 已提交
1 2
from paddle import v2 as paddle
from paddle.v2.attr import ParamAttr
S
Superjom 已提交
3
from utils import TaskType, logger, ModelType, ModelArch
S
Superjom 已提交
4 5 6 7 8 9


class DSSM(object):
    def __init__(self,
                 dnn_dims=[],
                 vocab_sizes=[],
S
Superjom 已提交
10 11
                 model_type=ModelType.create_classification(),
                 model_arch=ModelArch.create_cnn(),
S
Superjom 已提交
12 13
                 share_semantic_generator=False,
                 class_num=None,
S
Superjom 已提交
14 15
                 share_embed=False,
                 is_infer=False):
C
caoying03 已提交
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
        """
        :param dnn_dims: The dimention of each layer in the semantic vector
                         generator.
        :type dnn_dims: list of int
        :param vocab_sizes: The size of left and right items.
        :type vocab_sizes: A list having 2 elements.
        :param model_type: The type of task to train the DSSM model. The value
                           should be "rank: 0", "regression: 1" or
                           "classification: 2".
        :type model_type: int
        :param model_arch: A value indicating the model architecture to use.
        :type model_arch: int
        :param share_semantic_generator: A flag indicating whether to share the
                                         semantic vector between the left and
                                         the right item.
        :type share_semantic_generator: bool
        :param share_embed: A floag indicating whether to share the embeddings
                            between the left and the right item.
        :type share_embed: bool
        :param class_num: The number of categories.
        :type class_num: int
        """
C
caoying03 已提交
38
        assert len(vocab_sizes) == 2, (
C
caoying03 已提交
39 40 41 42
            "The vocab_sizes specifying the sizes left and right inputs. "
            "Its dimension should be 2.")
        assert len(dnn_dims) > 1, ("In the DNN model, more than two layers "
                                   "are needed.")
S
Superjom 已提交
43 44 45 46 47

        self.dnn_dims = dnn_dims
        self.vocab_sizes = vocab_sizes
        self.share_semantic_generator = share_semantic_generator
        self.share_embed = share_embed
S
Superjom 已提交
48 49
        self.model_type = ModelType(model_type)
        self.model_arch = ModelArch(model_arch)
S
Superjom 已提交
50
        self.class_num = class_num
S
Superjom 已提交
51
        self.is_infer = is_infer
C
caoying03 已提交
52
        logger.warning("Build DSSM model with config of %s, %s" %
S
Superjom 已提交
53
                       (self.model_type, self.model_arch))
C
caoying03 已提交
54
        logger.info("The vocabulary size is : %s" % str(self.vocab_sizes))
S
Superjom 已提交
55

S
Superjom 已提交
56 57
        # bind model architecture
        _model_arch = {
C
caoying03 已提交
58 59 60
            "cnn": self.create_cnn,
            "fc": self.create_fc,
            "rnn": self.create_rnn,
S
Superjom 已提交
61
        }
S
Superjom 已提交
62

C
caoying03 已提交
63
        def _model_arch_creater(emb, prefix=""):
S
Superjom 已提交
64 65 66 67 68
            sent_vec = _model_arch.get(str(model_arch))(emb, prefix)
            dnn = self.create_dnn(sent_vec, prefix)
            return dnn

        self.model_arch_creater = _model_arch_creater
S
Superjom 已提交
69 70

        _model_type = {
C
caoying03 已提交
71 72 73
            "classification": self._build_classification_model,
            "rank": self._build_rank_model,
            "regression": self._build_regression_model,
S
Superjom 已提交
74
        }
C
caoying03 已提交
75
        print("model type: ", str(self.model_type))
S
Superjom 已提交
76 77
        self.model_type_creater = _model_type[str(self.model_type)]

S
Superjom 已提交
78
    def __call__(self):
S
Superjom 已提交
79
        return self.model_type_creater()
S
Superjom 已提交
80

C
caoying03 已提交
81 82 83 84 85 86
    def create_embedding(self, input, prefix=""):
        """
        Create word embedding. The `prefix` is added in front of the name of
        embedding"s learnable parameter.
        """
        logger.info("Create embedding table [%s] whose dimention is %d. " %
S
Superjom 已提交
87
                    (prefix, self.dnn_dims[0]))
S
Superjom 已提交
88 89 90
        emb = paddle.layer.embedding(
            input=input,
            size=self.dnn_dims[0],
C
caoying03 已提交
91
            param_attr=ParamAttr(name="%s_emb.w" % prefix))
S
Superjom 已提交
92 93
        return emb

C
caoying03 已提交
94 95
    def create_fc(self, emb, prefix=""):
        """
S
Superjom 已提交
96 97
        A multi-layer fully connected neural networks.

C
caoying03 已提交
98 99 100 101 102
        :param emb: The output of the embedding layer
        :type emb: paddle.layer
        :param prefix: A prefix will be added to the layers' names.
        :type prefix: str
        """
S
Superjom 已提交
103 104
        _input_layer = paddle.layer.pooling(
            input=emb, pooling_type=paddle.pooling.Max())
R
ranqiu 已提交
105 106 107
        fc = paddle.layer.fc(
            input=_input_layer,
            size=self.dnn_dims[1],
C
caoying03 已提交
108
            param_attr=ParamAttr(name="%s_fc.w" % prefix),
R
ranqiu 已提交
109
            bias_attr=ParamAttr(name="%s_fc.b" % prefix, initial_std=0.))
S
Superjom 已提交
110 111
        return fc

C
caoying03 已提交
112 113
    def create_rnn(self, emb, prefix=""):
        """
S
Superjom 已提交
114
        A GRU sentence vector learner.
C
caoying03 已提交
115
        """
R
ranqiu 已提交
116 117 118
        gru = paddle.networks.simple_gru(
            input=emb,
            size=self.dnn_dims[1],
C
caoying03 已提交
119
            mixed_param_attr=ParamAttr(name="%s_gru_mixed.w" % prefix),
R
ranqiu 已提交
120
            mixed_bias_param_attr=ParamAttr(name="%s_gru_mixed.b" % prefix),
C
caoying03 已提交
121
            gru_param_attr=ParamAttr(name="%s_gru.w" % prefix),
R
ranqiu 已提交
122
            gru_bias_attr=ParamAttr(name="%s_gru.b" % prefix))
S
Superjom 已提交
123 124
        sent_vec = paddle.layer.last_seq(gru)
        return sent_vec
S
Superjom 已提交
125

C
caoying03 已提交
126 127
    def create_cnn(self, emb, prefix=""):
        """
S
Superjom 已提交
128 129
        A multi-layer CNN.

C
caoying03 已提交
130 131 132 133 134
        :param emb: The word embedding.
        :type emb: paddle.layer
        :param prefix: The prefix will be added to of layers' names.
        :type prefix: str
        """
S
Superjom 已提交
135

S
Superjom 已提交
136 137 138 139 140 141 142
        def create_conv(context_len, hidden_size, prefix):
            key = "%s_%d_%d" % (prefix, context_len, hidden_size)
            conv = paddle.networks.sequence_conv_pool(
                input=emb,
                context_len=context_len,
                hidden_size=hidden_size,
                # set parameter attr for parameter sharing
C
caoying03 已提交
143 144 145 146
                context_proj_param_attr=ParamAttr(name=key + "contex_proj.w"),
                fc_param_attr=ParamAttr(name=key + "_fc.w"),
                fc_bias_attr=ParamAttr(name=key + "_fc.b"),
                pool_bias_attr=ParamAttr(name=key + "_pool.b"))
S
Superjom 已提交
147
            return conv
S
Superjom 已提交
148

C
caoying03 已提交
149
        logger.info("create a sequence_conv_pool which context width is 3")
S
Superjom 已提交
150
        conv_3 = create_conv(3, self.dnn_dims[1], "cnn")
C
caoying03 已提交
151
        logger.info("create a sequence_conv_pool which context width is 4")
S
Superjom 已提交
152
        conv_4 = create_conv(4, self.dnn_dims[1], "cnn")
S
Superjom 已提交
153

S
Superjom 已提交
154 155 156
        return conv_3, conv_4

    def create_dnn(self, sent_vec, prefix):
S
Superjom 已提交
157
        # if more than three layers, than a fc layer will be added.
S
Superjom 已提交
158 159 160
        if len(self.dnn_dims) > 1:
            _input_layer = sent_vec
            for id, dim in enumerate(self.dnn_dims[1:]):
S
Superjom 已提交
161 162 163 164 165 166 167
                name = "%s_fc_%d_%d" % (prefix, id, dim)
                logger.info("create fc layer [%s] which dimention is %d" %
                            (name, dim))
                fc = paddle.layer.fc(
                    input=_input_layer,
                    size=dim,
                    act=paddle.activation.Tanh(),
C
caoying03 已提交
168 169
                    param_attr=ParamAttr(name="%s.w" % name),
                    bias_attr=ParamAttr(name="%s.b" % name, initial_std=0.))
S
Superjom 已提交
170 171
                _input_layer = fc
        return _input_layer
S
Superjom 已提交
172

S
Superjom 已提交
173
    def _build_classification_model(self):
S
Superjom 已提交
174 175
        logger.info("build classification model")
        assert self.model_type.is_classification()
S
Superjom 已提交
176 177
        return self._build_classification_or_regression_model(
            is_classification=True)
S
Superjom 已提交
178

S
Superjom 已提交
179
    def _build_regression_model(self):
S
Superjom 已提交
180 181
        logger.info("build regression model")
        assert self.model_type.is_regression()
S
Superjom 已提交
182 183
        return self._build_classification_or_regression_model(
            is_classification=False)
S
Superjom 已提交
184 185

    def _build_rank_model(self):
C
caoying03 已提交
186
        """
S
Superjom 已提交
187 188 189 190 191 192
        Build a pairwise rank model, and the cost is returned.

        A pairwise rank model has 3 inputs:
          - source sentence
          - left_target sentence
          - right_target sentence
C
caoying03 已提交
193 194
          - label, 1 if left_target should be sorted in front of
                   right_target, otherwise 0.
C
caoying03 已提交
195
        """
S
Superjom 已提交
196 197
        logger.info("build rank model")
        assert self.model_type.is_rank()
S
Superjom 已提交
198
        source = paddle.layer.data(
C
caoying03 已提交
199
            name="source_input",
S
Superjom 已提交
200 201
            type=paddle.data_type.integer_value_sequence(self.vocab_sizes[0]))
        left_target = paddle.layer.data(
C
caoying03 已提交
202
            name="left_target_input",
S
Superjom 已提交
203 204
            type=paddle.data_type.integer_value_sequence(self.vocab_sizes[1]))
        right_target = paddle.layer.data(
C
caoying03 已提交
205
            name="right_target_input",
S
Superjom 已提交
206
            type=paddle.data_type.integer_value_sequence(self.vocab_sizes[1]))
S
Superjom 已提交
207 208
        if not self.is_infer:
            label = paddle.layer.data(
C
caoying03 已提交
209
                name="label_input", type=paddle.data_type.integer_value(1))
S
Superjom 已提交
210

C
caoying03 已提交
211 212 213 214
        prefixs = "_ _ _".split(
        ) if self.share_semantic_generator else "source target target".split()
        embed_prefixs = "_ _".split(
        ) if self.share_embed else "source target target".split()
S
Superjom 已提交
215 216 217 218 219 220 221 222

        word_vecs = []
        for id, input in enumerate([source, left_target, right_target]):
            x = self.create_embedding(input, prefix=embed_prefixs[id])
            word_vecs.append(x)

        semantics = []
        for id, input in enumerate(word_vecs):
S
Superjom 已提交
223
            x = self.model_arch_creater(input, prefix=prefixs[id])
S
Superjom 已提交
224 225
            semantics.append(x)

C
caoying03 已提交
226
        # The cosine similarity score of source and left_target.
S
Superjom 已提交
227
        left_score = paddle.layer.cos_sim(semantics[0], semantics[1])
C
caoying03 已提交
228
        # The cosine similarity score of source and right target.
S
Superjom 已提交
229 230
        right_score = paddle.layer.cos_sim(semantics[0], semantics[2])

S
Superjom 已提交
231 232 233 234 235 236 237
        if not self.is_infer:
            # rank cost
            cost = paddle.layer.rank_cost(left_score, right_score, label=label)
            # prediction = left_score - right_score
            # but this operator is not supported currently.
            # so AUC will not used.
            return cost, None, label
S
Superjom 已提交
238
        return right_score
S
Superjom 已提交
239

S
Superjom 已提交
240
    def _build_classification_or_regression_model(self, is_classification):
C
caoying03 已提交
241
        """
S
Superjom 已提交
242
        Build a classification/regression model, and the cost is returned.
S
Superjom 已提交
243

C
caoying03 已提交
244
        The classification/regression task expects 3 inputs:
S
Superjom 已提交
245 246 247 248
          - source sentence
          - target sentence
          - classification label

C
caoying03 已提交
249
        """
S
Superjom 已提交
250 251
        if is_classification:
            assert self.class_num
S
Superjom 已提交
252 253

        source = paddle.layer.data(
C
caoying03 已提交
254
            name="source_input",
S
Superjom 已提交
255 256
            type=paddle.data_type.integer_value_sequence(self.vocab_sizes[0]))
        target = paddle.layer.data(
C
caoying03 已提交
257
            name="target_input",
S
Superjom 已提交
258 259
            type=paddle.data_type.integer_value_sequence(self.vocab_sizes[1]))
        label = paddle.layer.data(
C
caoying03 已提交
260
            name="label_input",
S
Superjom 已提交
261
            type=paddle.data_type.integer_value(self.class_num)
S
Superjom 已提交
262
            if is_classification else paddle.data_type.dense_vector(1))
S
Superjom 已提交
263

C
caoying03 已提交
264 265 266 267
        prefixs = "_ _".split(
        ) if self.share_semantic_generator else "source target".split()
        embed_prefixs = "_ _".split(
        ) if self.share_embed else "source target".split()
S
Superjom 已提交
268 269 270 271 272 273 274 275 276 277 278

        word_vecs = []
        for id, input in enumerate([source, target]):
            x = self.create_embedding(input, prefix=embed_prefixs[id])
            word_vecs.append(x)

        semantics = []
        for id, input in enumerate(word_vecs):
            x = self.model_arch_creater(input, prefix=prefixs[id])
            semantics.append(x)

S
Superjom 已提交
279 280 281 282 283 284 285 286 287 288
        if is_classification:
            concated_vector = paddle.layer.concat(semantics)
            prediction = paddle.layer.fc(
                input=concated_vector,
                size=self.class_num,
                act=paddle.activation.Softmax())
            cost = paddle.layer.classification_cost(
                input=prediction, label=label)
        else:
            prediction = paddle.layer.cos_sim(*semantics)
289
            cost = paddle.layer.square_error_cost(prediction, label)
S
Superjom 已提交
290

S
Superjom 已提交
291 292
        if not self.is_infer:
            return cost, prediction, label
S
Superjom 已提交
293
        return prediction