notest_understand_sentiment.py 11.0 KB
Newer Older
1
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14

15
import contextlib
16
import math
武毅 已提交
17
import os
18 19 20 21 22 23 24
import sys
import unittest

import numpy as np

import paddle
import paddle.fluid as fluid
25 26


27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
def convolution_net(
    data, label, input_dim, class_dim=2, emb_dim=32, hid_dim=32
):
    emb = fluid.layers.embedding(
        input=data, size=[input_dim, emb_dim], is_sparse=True
    )
    conv_3 = fluid.nets.sequence_conv_pool(
        input=emb,
        num_filters=hid_dim,
        filter_size=3,
        act="tanh",
        pool_type="sqrt",
    )
    conv_4 = fluid.nets.sequence_conv_pool(
        input=emb,
        num_filters=hid_dim,
        filter_size=4,
        act="tanh",
        pool_type="sqrt",
    )
    prediction = fluid.layers.fc(
        input=[conv_3, conv_4], size=class_dim, act="softmax"
    )
50
    cost = fluid.layers.cross_entropy(input=prediction, label=label)
51
    avg_cost = paddle.mean(cost)
52
    accuracy = paddle.static.accuracy(input=prediction, label=label)
53
    return avg_cost, accuracy, prediction
Q
QI JUN 已提交
54 55


56 57 58
def stacked_lstm_net(
    data, label, input_dim, class_dim=2, emb_dim=128, hid_dim=512, stacked_num=3
):
Q
QI JUN 已提交
59 60
    assert stacked_num % 2 == 1

61 62 63
    emb = fluid.layers.embedding(
        input=data, size=[input_dim, emb_dim], is_sparse=True
    )
Q
QI JUN 已提交
64 65 66
    # add bias attr

    # TODO(qijun) linear act
67 68
    fc1 = fluid.layers.fc(input=emb, size=hid_dim)
    lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim)
Q
QI JUN 已提交
69 70 71 72

    inputs = [fc1, lstm1]

    for i in range(2, stacked_num + 1):
73
        fc = fluid.layers.fc(input=inputs, size=hid_dim)
74 75 76
        lstm, cell = fluid.layers.dynamic_lstm(
            input=fc, size=hid_dim, is_reverse=(i % 2) == 0
        )
Q
QI JUN 已提交
77 78
        inputs = [fc, lstm]

79 80
    fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
    lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
Q
QI JUN 已提交
81

82 83 84
    prediction = fluid.layers.fc(
        input=[fc_last, lstm_last], size=class_dim, act='softmax'
    )
85
    cost = fluid.layers.cross_entropy(input=prediction, label=label)
86
    avg_cost = paddle.mean(cost)
87
    accuracy = paddle.static.accuracy(input=prediction, label=label)
88
    return avg_cost, accuracy, prediction
Q
QI JUN 已提交
89

90

91 92 93 94 95 96 97 98
def train(
    word_dict,
    net_method,
    use_cuda,
    parallel=False,
    save_dirname=None,
    is_local=True,
):
99 100
    BATCH_SIZE = 128
    PASS_NUM = 5
Q
QI JUN 已提交
101 102 103
    dict_dim = len(word_dict)
    class_dim = 2

104 105 106
    data = fluid.layers.data(
        name="words", shape=[1], dtype="int64", lod_level=1
    )
Y
Yu Yang 已提交
107
    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
108 109

    if not parallel:
110 111 112
        cost, acc_out, prediction = net_method(
            data, label, input_dim=dict_dim, class_dim=class_dim
        )
113
    else:
X
Xin Pan 已提交
114
        raise NotImplementedError()
115 116

    adagrad = fluid.optimizer.Adagrad(learning_rate=0.002)
W
Wu Yi 已提交
117
    adagrad.minimize(cost)
Q
QI JUN 已提交
118

119 120 121 122 123 124
    train_data = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.imdb.train(word_dict), buf_size=1000
        ),
        batch_size=BATCH_SIZE,
    )
125
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
126
    exe = fluid.Executor(place)
Y
Yu Yang 已提交
127
    feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
Q
QI JUN 已提交
128

武毅 已提交
129 130 131
    def train_loop(main_program):
        exe.run(fluid.default_startup_program())

132
        for pass_id in range(PASS_NUM):
武毅 已提交
133
            for data in train_data():
134 135 136 137 138
                cost_val, acc_val = exe.run(
                    main_program,
                    feed=feeder.feed(data),
                    fetch_list=[cost, acc_out],
                )
139
                print("cost=" + str(cost_val) + " acc=" + str(acc_val))
武毅 已提交
140 141
                if cost_val < 0.4 and acc_val > 0.8:
                    if save_dirname is not None:
142 143 144
                        fluid.io.save_inference_model(
                            save_dirname, ["words"], prediction, exe
                        )
武毅 已提交
145 146 147
                    return
                if math.isnan(float(cost_val)):
                    sys.exit("got NaN loss, training failed.")
148 149 150
        raise AssertionError(
            "Cost is too large for {0}".format(net_method.__name__)
        )
武毅 已提交
151 152 153 154

    if is_local:
        train_loop(fluid.default_main_program())
    else:
G
gongweibao 已提交
155 156
        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
武毅 已提交
157 158 159 160
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
G
gongweibao 已提交
161
        trainers = int(os.getenv("PADDLE_TRAINERS"))
武毅 已提交
162
        current_endpoint = os.getenv("POD_IP") + ":" + port
G
gongweibao 已提交
163 164
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
武毅 已提交
165
        t = fluid.DistributeTranspiler()
Y
Yancey1989 已提交
166
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
武毅 已提交
167 168
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
169 170 171
            pserver_startup = t.get_startup_program(
                current_endpoint, pserver_prog
            )
武毅 已提交
172 173 174 175
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())
176 177


L
Liu Yiqun 已提交
178
def infer(word_dict, use_cuda, save_dirname=None):
179 180 181 182 183 184
    if save_dirname is None:
        return

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

185 186 187
    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
T
tianshuo78520a 已提交
188
        # the feed_target_names (the names of variables that will be fed
189 190
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
191 192 193 194 195
        [
            inference_program,
            feed_target_names,
            fetch_targets,
        ] = fluid.io.load_inference_model(save_dirname, exe)
196 197 198

        word_dict_len = len(word_dict)

K
Kexin Zhao 已提交
199
        # Setup input by creating LoDTensor to represent sequence of words.
200 201
        # Here each word is the basic element of the LoDTensor and the shape of
        # each word (base_shape) should be [1] since it is simply an index to
K
Kexin Zhao 已提交
202
        # look up for the corresponding word vector.
203
        # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]],
204 205 206 207
        # which has only one level of detail. Then the created LoDTensor will have only
        # one higher level structure (sequence of words, or sentence) than the basic
        # element (word). Hence the LoDTensor will hold data for three sentences of
        # length 3, 4 and 2, respectively.
208 209
        # Note that recursive_sequence_lengths should be a list of lists.
        recursive_seq_lens = [[3, 4, 2]]
K
Kexin Zhao 已提交
210 211
        base_shape = [1]
        # The range of random integers is [low, high]
212 213 214
        tensor_words = fluid.create_random_int_lodtensor(
            recursive_seq_lens, base_shape, place, low=0, high=word_dict_len - 1
        )
215 216 217 218

        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
        # and results will contain a list of data corresponding to fetch_targets.
        assert feed_target_names[0] == "words"
219 220 221 222 223 224
        results = exe.run(
            inference_program,
            feed={feed_target_names[0]: tensor_words},
            fetch_list=fetch_targets,
            return_numpy=False,
        )
225
        print(results[0].recursive_sequence_lengths())
226
        np_data = np.array(results[0])
227 228
        print("Inference Shape: ", np_data.shape)
        print("Inference results: ", np_data)
229 230


231
def main(word_dict, net_method, use_cuda, parallel=False, save_dirname=None):
232 233 234
    if use_cuda and not fluid.core.is_compiled_with_cuda():
        return

235 236 237 238 239 240 241
    train(
        word_dict,
        net_method,
        use_cuda,
        parallel=parallel,
        save_dirname=save_dirname,
    )
242
    infer(word_dict, use_cuda, save_dirname)
243 244


245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
class TestUnderstandSentiment(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.word_dict = paddle.dataset.imdb.word_dict()

    @contextlib.contextmanager
    def new_program_scope(self):
        prog = fluid.Program()
        startup_prog = fluid.Program()
        scope = fluid.core.Scope()
        with fluid.scope_guard(scope):
            with fluid.program_guard(prog, startup_prog):
                yield

    def test_conv_cpu(self):
        with self.new_program_scope():
261 262 263 264 265 266
            main(
                self.word_dict,
                net_method=convolution_net,
                use_cuda=False,
                save_dirname="understand_sentiment_conv.inference.model",
            )
267

268 269
    def test_conv_cpu_parallel(self):
        with self.new_program_scope():
270 271 272 273 274 275
            main(
                self.word_dict,
                net_method=convolution_net,
                use_cuda=False,
                parallel=True,
            )
276 277

    @unittest.skip(reason="make CI faster")
278 279
    def test_stacked_lstm_cpu(self):
        with self.new_program_scope():
280 281 282 283
            main(
                self.word_dict,
                net_method=stacked_lstm_net,
                use_cuda=False,
284
                save_dirname="understand_sentiment_stacked_lstm.inference.model",
285
            )
286

287 288
    def test_stacked_lstm_cpu_parallel(self):
        with self.new_program_scope():
289 290 291 292 293 294
            main(
                self.word_dict,
                net_method=stacked_lstm_net,
                use_cuda=False,
                parallel=True,
            )
295

296 297
    def test_conv_gpu(self):
        with self.new_program_scope():
298 299 300 301 302 303
            main(
                self.word_dict,
                net_method=convolution_net,
                use_cuda=True,
                save_dirname="understand_sentiment_conv.inference.model",
            )
304 305 306

    def test_conv_gpu_parallel(self):
        with self.new_program_scope():
307 308 309 310 311 312
            main(
                self.word_dict,
                net_method=convolution_net,
                use_cuda=True,
                parallel=True,
            )
313

314
    @unittest.skip(reason="make CI faster")
315 316
    def test_stacked_lstm_gpu(self):
        with self.new_program_scope():
317 318 319 320
            main(
                self.word_dict,
                net_method=stacked_lstm_net,
                use_cuda=True,
321
                save_dirname="understand_sentiment_stacked_lstm.inference.model",
322
            )
Q
QI JUN 已提交
323

324 325
    def test_stacked_lstm_gpu_parallel(self):
        with self.new_program_scope():
326 327 328 329 330 331
            main(
                self.word_dict,
                net_method=stacked_lstm_net,
                use_cuda=True,
                parallel=True,
            )
332

Q
QI JUN 已提交
333 334

if __name__ == '__main__':
335
    unittest.main()