test_standalone_executor.py 13.8 KB
Newer Older
H
hong 已提交
1
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2
#
H
hong 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
6
#
H
hong 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
H
hong 已提交
9 10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import os
16

17
os.environ['FLAGS_use_stream_safe_cuda_allocator'] = "true"
18
import json
L
liutiexing 已提交
19
import shutil
20
import sys
H
hong 已提交
21
import unittest
22 23 24

import numpy as np

H
hong 已提交
25
import paddle
L
Leo Chen 已提交
26
from paddle.fluid import core, framework
27
from paddle.fluid.core import StandaloneExecutor
L
liutiexing 已提交
28
from paddle.profiler import profiler
H
hong 已提交
29 30 31 32

paddle.enable_static()


L
Leo Chen 已提交
33
class TestDryRun(unittest.TestCase):
H
hong 已提交
34
    def setUp(self):
35 36 37 38 39
        place = (
            paddle.CUDAPlace(0)
            if core.is_compiled_with_cuda()
            else paddle.CPUPlace()
        )
40 41
        self.place = core.Place()
        self.place.set_place(place)
H
hong 已提交
42

43
    def build_program(self):
L
Leo Chen 已提交
44 45 46 47 48 49 50
        startup_program = paddle.static.Program()
        main_program = paddle.static.Program()
        with paddle.static.program_guard(main_program, startup_program):
            a = paddle.static.data(name="a", shape=[2, 2], dtype='float32')
            b = paddle.ones([2, 2]) * 2
            t = paddle.static.nn.fc(a, 2)
            c = t + b
H
hong 已提交
51

52 53 54
        return startup_program, main_program, c

    def test_dry_run(self):
55
        scope = core.Scope()
56
        startup_program, main_program, c = self.build_program()
L
Leo Chen 已提交
57 58 59 60
        exe = paddle.static.Executor(self.place)
        exe.run(startup_program, scope=scope)

        standaloneexecutor = StandaloneExecutor(self.place, main_program.desc)
61
        # test for cost_info
62
        cost_info = standaloneexecutor.dry_run(
63 64
            scope, {"a": np.ones([2, 2], dtype="float32")}
        )
65 66 67
        self.check_cost_info(cost_info)

    def check_cost_info(self, cost_info):
68 69
        IS_WINDOWS = sys.platform.startswith('win')

70
        if core.is_compiled_with_cuda():
71 72 73
            # # w,bias,b, out, memory block is at least 256 bytes on Linux
            gt = 16 * 4 if IS_WINDOWS else 256 * 4
            self.assertGreater(cost_info.device_memory_bytes(), gt)
74 75 76
        else:
            self.assertEqual(cost_info.device_memory_bytes(), 0)

H
hong 已提交
77

78 79 80
def build_program():
    main_program = paddle.static.Program()
    startup_program = paddle.static.Program()
81

82 83 84
    with paddle.static.program_guard(main_program, startup_program):
        with paddle.static.device_guard('cpu'):
            data = paddle.ones([4, 64], dtype='float32', name='data')
85

86 87 88 89 90 91 92 93 94 95 96
        # data -> [memcpy_h2d] -> data' -> [matmul] -> out ->[add] -> add_out
        with paddle.static.device_guard('gpu'):
            weight = paddle.randn([64, 64], name='weight')  # gpu
            matmul_out = paddle.matmul(data, weight, name='matmul_out')  # gpus
            bias = paddle.ones([4, 64], dtype='float32', name='bias')
            add_out = paddle.add(matmul_out, bias, name='add_out')

        # add_out -> [memcpy_d2h] -> add_out' -> [sub] -> sub_out -> [tanh] -> tanh_out
        with paddle.static.device_guard('cpu'):
            sub_out = paddle.subtract(add_out, data, name='sub_out')
            tanh_out = paddle.tanh(sub_out, name='tanh_out')
97

98 99 100 101
        with paddle.static.device_guard('gpu'):
            bias_1 = paddle.add(bias, sub_out, name='bias_1')
            out_before = paddle.tanh(bias_1, name='out_before')
            out_last = paddle.subtract(tanh_out, data, name='out_last')
102

103 104
            out = paddle.add(out_before, out_last, name='out')
            mean = paddle.mean(out, name='mean_out')
105

106
    return main_program, startup_program, [mean]
107 108


L
liutiexing 已提交
109 110 111
class ExecutorStatisticsTestCase(unittest.TestCase):
    def setUp(self):
        self.iter_n = 3
112 113 114 115 116
        self.place = (
            paddle.CUDAPlace(0)
            if core.is_compiled_with_cuda()
            else paddle.CPUPlace()
        )
L
Leo Chen 已提交
117
        self.perf_path = './perfstat'
L
liutiexing 已提交
118 119

    def test_parallel_executor_statistics(self):
L
Leo Chen 已提交
120
        self.run_with_statistics(executor='ParallelExecutor')
L
liutiexing 已提交
121

L
Leo Chen 已提交
122 123
    def test_executor_statistics(self):
        self.run_with_statistics(executor='Executor')
L
liutiexing 已提交
124

L
Leo Chen 已提交
125 126
    def test_standalone_executor_statistics(self):
        self.run_with_statistics(executor='StandaloneExecutor')
L
liutiexing 已提交
127

L
Leo Chen 已提交
128
    def run_with_statistics(self, executor=None):
129 130
        # random failed, skip this testcase
        return
L
liutiexing 已提交
131 132 133
        if os.getenv("FLAGS_static_executor_perfstat_filepath") is None:
            return
        paddle.seed(2020)
L
Leo Chen 已提交
134
        # note: startup program is empty
L
liutiexing 已提交
135
        main_program, startup_program, fetch_list = build_program()
L
Leo Chen 已提交
136 137 138 139 140 141 142 143 144 145 146 147 148

        enable = True
        if executor == 'ParallelExecutor':
            main_program = paddle.fluid.compiler.CompiledProgram(main_program)
            enable = False
        elif executor == 'Executor':
            enable = False

        scope = paddle.static.Scope()
        with paddle.static.scope_guard(scope):
            with framework._enable_standalone_executor(enable):
                exe = paddle.static.Executor(self.place)
                helper_profiler = profiler.Profiler(
149 150
                    targets=[profiler.ProfilerTarget.CPU], scheduler=(1, 2)
                )
L
Leo Chen 已提交
151 152 153 154 155 156 157 158
                helper_profiler.start()
                for i in range(self.iter_n):
                    exe.run(main_program, fetch_list=fetch_list)
                    helper_profiler.step()
                helper_profiler.stop()

        self.assertTrue(os.path.exists(self.perf_path))
        with open(self.perf_path, 'r') as load_f:
L
liutiexing 已提交
159 160 161
            stat_res = json.load(load_f)
            self.assertTrue(len(stat_res) > 0)

L
Leo Chen 已提交
162
        os.remove(self.perf_path)
L
liutiexing 已提交
163 164 165
        shutil.rmtree('./profiler_log')


166 167 168
class MultiStreamModelTestCase(unittest.TestCase):
    def setUp(self):
        self.iter_n = 2
169 170 171 172 173
        self.place = (
            paddle.CUDAPlace(0)
            if core.is_compiled_with_cuda()
            else paddle.CPUPlace()
        )
174

175
    def test_result(self):
L
Leo Chen 已提交
176 177
        ground_truths = self.run_test(False)
        res = self.run_test(True)
178

179 180 181
        for gt, out in zip(ground_truths, res):
            self.assertEqual(gt[0], out[0])

L
Leo Chen 已提交
182
    def run_test(self, use_new_executor=True):
183
        paddle.seed(2020)
184
        main_program, startup_program, fetch_list = build_program()
185

L
Leo Chen 已提交
186 187 188 189 190 191
        with framework._enable_standalone_executor(use_new_executor):
            scope = core.Scope()
            exe = paddle.static.Executor(self.place)
            outs = []
            for i in range(self.iter_n):
                outs.append(
192 193
                    exe.run(main_program, scope=scope, fetch_list=fetch_list)
                )
L
Leo Chen 已提交
194
            print(outs)
195 196 197 198 199
        return outs


class SwitchExecutorInterfaceWithFeed(unittest.TestCase):
    def setUp(self):
200 201 202 203 204
        self.place = (
            paddle.CUDAPlace(0)
            if core.is_compiled_with_cuda()
            else paddle.CPUPlace()
        )
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
        self.iter_run = 2

    def build_program(self, is_double=False):
        main_program = paddle.static.Program()
        startup_program = paddle.static.Program()
        with paddle.static.program_guard(main_program, startup_program):
            a = paddle.static.data(name="a", shape=[2, 2], dtype='float32')
            b = paddle.ones([2, 2]) * 2
            t = paddle.static.nn.fc(a, 2)
            c = t + b
            if is_double:
                c = c + c

        return main_program, startup_program, [c]

220 221 222 223 224 225 226 227
    def _run(
        self,
        feed,
        use_str=False,
        is_double=False,
        add_wrong_fetch=False,
        use_compiled=False,
    ):
228 229 230
        paddle.seed(2020)

        main_program, startup_program, fetch_vars = self.build_program(
231 232
            is_double
        )
233 234 235 236

        exe = paddle.static.Executor(self.place)
        exe.run(startup_program)

237
        if use_compiled:
238
            main_program = paddle.static.CompiledProgram(main_program)
239

240 241 242 243 244 245 246 247 248 249 250 251
        if use_str:  # test for fetch name
            fetch_vars = [x.name for x in fetch_vars]
        if add_wrong_fetch:  # test for wrong fetch type
            fetch_vars.append(1123)
        outs = []
        for i in range(self.iter_run):
            out = exe.run(main_program, feed=feed, fetch_list=fetch_vars)[0]

            outs.append(out)

        return outs

252
    def run_raw_executor(self, feed, use_compiled=False):
L
Leo Chen 已提交
253 254
        with framework._enable_standalone_executor(False):
            # run construct program 1
255 256 257
            out1 = self._run(
                feed, use_str=False, is_double=False, use_compiled=use_compiled
            )
L
Leo Chen 已提交
258
            # run construct program 2 with same executor
259 260 261
            out2 = self._run(
                feed, use_str=True, is_double=True, use_compiled=use_compiled
            )
L
Leo Chen 已提交
262 263

            return [out1, out2]
264

265
    def run_new_executor(self, feed, use_compiled=False):
L
Leo Chen 已提交
266 267
        with framework._enable_standalone_executor():
            out = self.run_raw_executor(feed, use_compiled=use_compiled)
268 269 270 271 272 273 274 275 276
        return out

    def test_with_feed(self):
        data = np.ones([2, 2], dtype="float32")
        feed = {"a": data, 'fake_input': data}

        res = self.run_new_executor(feed)
        gt = self.run_raw_executor(feed)
        for x, y in zip(gt, res):
277
            np.testing.assert_array_equal(x, y)
278 279 280 281 282

    def test_with_error(self):
        feed = [{'a': np.ones([2, 2], dtype="float32")}]

        with self.assertRaises(TypeError):
L
Leo Chen 已提交
283 284
            with framework._enable_standalone_executor():
                self._run(feed[0], add_wrong_fetch=True)
285

286 287 288 289 290 291 292 293
    def test_empty_program(self):
        program = paddle.static.Program()
        exe = paddle.static.Executor(self.place)
        for i in range(10):
            out = exe.run()  # old executor

        for i in range(10):
            print(i, flush=1)
L
Leo Chen 已提交
294 295
            with framework._enable_standalone_executor():
                out = exe.run(program, feed=None)
296

297

298 299 300
class TestException(unittest.TestCase):
    def setUp(self):
        self.place = paddle.CPUPlace()
301
        self.fetch_vars = None
302 303 304 305 306

    def build_program(self):
        main_program = paddle.static.Program()
        startup_program = paddle.static.Program()
        with paddle.static.program_guard(main_program, startup_program):
307
            w = paddle.rand([10, 3])
308
            ids = paddle.static.data(name="id", shape=[5], dtype='int64')
309
            data = paddle.static.data(name="data", shape=[3], dtype='float32')
310 311 312
            emb = paddle.nn.functional.embedding(
                x=ids, weight=w, sparse=False, name="embedding"
            )
313
            emb = emb + data
314 315 316 317 318 319 320 321 322 323 324 325 326

        return main_program, startup_program, emb

    def _run(self, feeds):
        paddle.seed(2020)

        main_program, startup_program, fetch_vars = self.build_program()

        exe = paddle.static.Executor(self.place)
        exe.run(startup_program)

        for feed in feeds:
            out = exe.run(main_program, feed=feed, fetch_list=fetch_vars)
327
        self.fetch_vars = fetch_vars
328 329 330
        return out

    def run_new_executor(self, feed):
L
Leo Chen 已提交
331 332
        with framework._enable_standalone_executor():
            out = self._run(feed)
333 334 335
        return out

    def test_exception(self):
336 337 338 339 340 341 342 343 344 345
        feed = [
            {
                'id': np.array([1, 2, 3, 4, 5]).astype(np.int64),
                'data': np.array([1, 2, 3]).astype(np.float32),
            },
            {
                'id': np.array([1, 2, 3, 4, 11]).astype(np.int64),
                'data': np.array([1, 2, 3]).astype(np.float32),
            },
        ]
346 347
        self.assertRaises(ValueError, self.run_new_executor, feed)

348
    def test_nan(self):
349
        flags = {'FLAGS_check_nan_inf': True, 'FLAGS_benchmark': True}
350
        paddle.fluid.set_flags(flags)
351 352 353 354 355 356 357 358 359 360
        feed = [
            {
                'id': np.array([1, 2, 3, 4, 5]).astype(np.int64),
                'data': np.array([1, 2, 3]).astype(np.float32),
            },
            {
                'id': np.array([1, 2, 3, 4, 5]).astype(np.int64),
                'data': np.array([1, 2, 3]).astype(np.float32),
            },
        ]
361 362 363
        feed[1]['data'][0] = np.nan
        self.assertRaises(RuntimeError, self.run_new_executor, feed)

364
    def test_scope_find_temp_var(self):
365 366 367 368 369 370 371 372 373 374
        feed = [
            {
                'id': np.array([1, 2, 3, 4, 5]).astype(np.int64),
                'data': np.array([1, 2, 3]).astype(np.float32),
            },
            {
                'id': np.array([1, 2, 3, 4, 5]).astype(np.int64),
                'data': np.array([2, 2, 2]).astype(np.float32),
            },
        ]
L
Leo Chen 已提交
375
        self.run_new_executor(feed)
376 377 378
        self.assertIsNone(
            paddle.static.global_scope().find_var(self.fetch_vars.name)
        )
L
Leo Chen 已提交
379

380

381 382 383 384 385 386 387 388 389 390 391 392 393
class TestFetchEmptyTensor(unittest.TestCase):
    def test_fetch(self):
        places = [paddle.CPUPlace()]
        if paddle.fluid.core.is_compiled_with_cuda():
            places.append(paddle.CUDAPlace(0))
        for place in places:
            with paddle.static.program_guard(paddle.static.Program()):
                out = paddle.empty([3, 0])
                exe = paddle.static.Executor(place)
                res = exe.run(fetch_list=[out])
            self.assertEqual(res[0].shape, (3, 0))


394 395 396 397
class TestInplaceApiWithDataTransform(unittest.TestCase):
    def test_increment(self):
        if paddle.fluid.core.is_compiled_with_cuda():
            with paddle.fluid.device_guard("gpu:0"):
398
                x = paddle.tensor.fill_constant([1], "float32", 0)
399 400 401
            with paddle.fluid.device_guard("cpu"):
                x = paddle.increment(x)
            exe = paddle.static.Executor(paddle.CUDAPlace(0))
L
Leo Chen 已提交
402
            with framework._enable_standalone_executor():
403

L
Leo Chen 已提交
404
                for i in range(10):
405 406 407
                    (a,) = exe.run(
                        paddle.static.default_main_program(), fetch_list=[x]
                    )
L
Leo Chen 已提交
408
                    self.assertEqual(a[0], 1)
409 410


H
hong 已提交
411 412
if __name__ == "__main__":
    unittest.main()