未验证 提交 b958fa75 编写于 作者: K kangguangli 提交者: GitHub

[with_data_parallel][part3] remove with_data_parallel in unit test (#50568)

* remove with_data_parallel in unittest

* fix CI

* remove comment

* trigger CI

* revert part changes

* test_build_strategy_fusion_group_pass
上级 499b7f87
......@@ -235,9 +235,7 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase):
exe.run(startup_program)
if use_compiled:
main_program = paddle.static.CompiledProgram(
main_program
).with_data_parallel(fetch_vars[0].name, places=[self.place])
main_program = paddle.static.CompiledProgram(main_program)
if use_str: # test for fetch name
fetch_vars = [x.name for x in fetch_vars]
......
......@@ -32,7 +32,7 @@ class FusionGroupPaddingRNNTest(PaddingRNNTestBase):
rnn_model = "static"
config = RNNConfig("test", rnn_model)
with fluid.scope_guard(fluid.Scope()):
self.train(config, parallel=True, use_program_cache=False)
self.train(config, use_program_cache=False)
if __name__ == '__main__':
......
......@@ -72,30 +72,6 @@ class TestCompiledProgram(unittest.TestCase):
)
np.testing.assert_array_equal(loss_data[0], self.loss)
def test_compiled_program_with_data_parallel(self):
with new_program_scope():
paddle.seed(self.seed)
paddle.framework.random._manual_program_seed(self.seed)
place = (
fluid.CUDAPlace(0)
if core.is_compiled_with_cuda()
else fluid.CPUPlace()
)
exe = fluid.Executor(place)
loss = simple_fc_net()
exe.run(fluid.default_startup_program())
compiled_prog = fluid.CompiledProgram(
fluid.default_main_program()
).with_data_parallel(loss_name=loss.name, places=[place])
(loss_data,) = exe.run(
compiled_prog,
feed={"image": self.img, "label": self.label},
fetch_list=[loss.name],
)
np.testing.assert_array_equal(loss_data[0], self.loss)
class TestCompiledProgramError(unittest.TestCase):
def test_program_or_graph_error(self):
......@@ -112,17 +88,6 @@ class TestCompiledProgramError(unittest.TestCase):
)
avg_loss = paddle.mean(loss)
def compile_program_not_compiled(self):
with fluid.program_guard(fluid.Program()):
# build model
self.build_simple_model()
# compile program
program = fluid.default_main_program()
compiled_program = fluid.CompiledProgram(
program
).with_data_parallel()
return compiled_program
def compile_program(self):
with fluid.program_guard(fluid.Program()):
# build model
......@@ -149,34 +114,6 @@ class TestCompiledProgramError(unittest.TestCase):
with self.assertRaises(ValueError):
compiled_program._compile(scope, new_place)
def test_share_vars_from_error_no_parallel(self):
with fluid.program_guard(fluid.Program()):
source_program, _, _ = self.compile_program()
self.build_simple_model()
# compile program
program = fluid.default_main_program()
compiled_program = fluid.CompiledProgram(
program
).with_data_parallel(share_vars_from=source_program)
scope = fluid.global_scope()
place = fluid.CPUPlace()
with self.assertRaises(ValueError):
compiled_program._compile(scope, place)
def test_share_vars_from_error_no_executor(self):
with fluid.program_guard(fluid.Program()):
source_program = self.compile_program_not_compiled()
self.build_simple_model()
# compile program
program = fluid.default_main_program()
compiled_program = fluid.CompiledProgram(
program
).with_data_parallel(share_vars_from=source_program)
scope = fluid.global_scope()
place = fluid.CPUPlace()
with self.assertRaises(ValueError):
compiled_program._compile(scope, place)
if __name__ == '__main__':
unittest.main()
......@@ -111,9 +111,7 @@ class TestCUDAGraphInStaticMode(unittest.TestCase):
build_strategy.fix_op_run_order = True
build_strategy.fuse_all_optimizer_ops = True
compiled_program = paddle.static.CompiledProgram(
main
).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy, places=place
main, build_strategy=build_strategy
)
image_t = scope.var(image.name).get_tensor()
label_t = scope.var(label.name).get_tensor()
......
......@@ -106,13 +106,13 @@ class DatasetLoaderTestBase(unittest.TestCase):
dataset._set_batch_size(BATCH_SIZE)
if isinstance(place, fluid.CPUPlace):
file_num = 10
file_num = 1
os.environ['CPU_NUM'] = str(file_num)
places = fluid.cpu_places()
places = [fluid.CPUPlace()]
use_cuda = False
else:
file_num = fluid.core.get_cuda_device_count()
places = fluid.cuda_places()
file_num = 1
places = [fluid.CUDAPlace(0)]
use_cuda = True
filelist = []
......@@ -145,7 +145,7 @@ class DatasetLoaderTestBase(unittest.TestCase):
dataloader = fluid.io.DataLoader.from_dataset(
dataset=dataset, places=places, drop_last=self.drop_last
)
prog = fluid.CompiledProgram(main_prog).with_data_parallel()
prog = fluid.CompiledProgram(main_prog)
exe = fluid.Executor(place)
exe.run(startup_prog)
......
......@@ -19,7 +19,6 @@ import numpy as np
os.environ['FLAGS_use_mkldnn'] = '0'
os.environ['CPU_NUM'] = '4'
import multiprocessing
import unittest
from functools import reduce
......@@ -82,13 +81,6 @@ class TestExecutor(unittest.TestCase):
with fluid.unique_name.guard():
self.executor_main()
for p in places:
self.place = p
with fluid.program_guard(fluid.Program(), fluid.Program()):
with fluid.scope_guard(fluid.Scope()):
with fluid.unique_name.guard():
self.pe_main()
def prepare_feed(self, image, label, dev_cnt=1):
batch_size = 32 * dev_cnt
image_shape = (batch_size,) + tuple(image.shape[1:])
......@@ -179,48 +171,6 @@ class TestExecutor(unittest.TestCase):
fluid.global_scope(), persistables, non_persistables
)
def pe_main(self):
image, label, loss = simple_fc_net()
loss.persistable = False
persistables, non_persistables = get_persistables_and_non_persistables(
fluid.default_main_program(), [loss.name]
)
self.assert_gc_vars(
fluid.default_main_program(), [loss.name], non_persistables
)
exe = fluid.Executor(self.place)
exe.run(fluid.default_startup_program())
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_iteration_per_drop_scope = 100
build_strategy = fluid.BuildStrategy()
build_strategy.memory_optimize = False
build_strategy.enable_inplace = False
prog = fluid.CompiledProgram(
fluid.default_main_program()
).with_data_parallel(loss_name=loss.name, exec_strategy=exec_strategy)
dev_cnt = (
fluid.core.get_cuda_device_count()
if isinstance(self.place, fluid.CUDAPlace)
else int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
)
for idx in range(10):
image_np, label_np = self.prepare_feed(image, label, dev_cnt)
feed = {image.name: image_np, label.name: label_np}
exe.run(program=prog, feed=feed, fetch_list=[loss])
local_scopes = prog._local_scopes
for scope in local_scopes:
kids = scope._kids()
self.assertTrue(len(kids) == 1)
self.assertScopeVar(kids[0], persistables, non_persistables)
if __name__ == '__main__':
unittest.main()
......@@ -23,20 +23,13 @@ from fake_reader import fake_imdb_reader
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid import compiler
def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
def train(network, use_cuda, batch_size=32, pass_num=2):
if use_cuda and not core.is_compiled_with_cuda():
print('Skip use_cuda=True because Paddle is not compiled with cuda')
return
if use_parallel_executor and os.name == 'nt':
print(
'Skip use_parallel_executor=True because Paddle comes without parallel support on windows'
)
return
word_dict_size = 5147
reader = fake_imdb_reader(word_dict_size, batch_size * 40)
train_reader = paddle.batch(reader, batch_size=batch_size)
......@@ -54,9 +47,7 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
reader = feeder.decorate_reader(
train_reader, multi_devices=use_parallel_executor
)
reader = feeder.decorate_reader(train_reader, multi_devices=False)
exe = fluid.Executor(place)
fluid.default_startup_program().random_seed = 1
......@@ -64,13 +55,7 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
exe.run(fluid.default_startup_program())
train_cp = fluid.default_main_program()
if use_parallel_executor:
train_cp = compiler.CompiledProgram(
fluid.default_main_program()
).with_data_parallel(loss_name=cost.name)
fetch_list = [cost.name]
else:
fetch_list = [cost]
fetch_list = [cost]
for pass_id in range(pass_num):
batch_id = 0
......@@ -94,12 +79,9 @@ class TestBase(unittest.TestCase):
return
for use_cuda in [True, False]:
for use_parallel_executor in [False, True]:
print(
'network: {}, use_cuda: {}, use_parallel_executor: {}'.format(
self.net.__name__, use_cuda, use_parallel_executor
)
)
with fluid.program_guard(fluid.Program(), fluid.Program()):
with fluid.scope_guard(core.Scope()):
train(self.net, use_cuda, use_parallel_executor)
print(
'network: {}, use_cuda: {}'.format(self.net.__name__, use_cuda)
)
with fluid.program_guard(fluid.Program(), fluid.Program()):
with fluid.scope_guard(core.Scope()):
train(self.net, use_cuda)
......@@ -473,7 +473,7 @@ class PaddingRNNTestBase(unittest.TestCase):
# You can override the function to set your own config.
pass
def _prepare_program(self, config, parallel=True):
def _prepare_program(self, config):
paddle.seed(config.random_seed)
self.main_program = fluid.Program()
self.startup_program = fluid.Program()
......@@ -517,16 +517,7 @@ class PaddingRNNTestBase(unittest.TestCase):
self.exe.run(self.startup_program)
if parallel:
self.train_program = fluid.compiler.CompiledProgram(
self.main_program
).with_data_parallel(
loss_name=self.loss.name,
build_strategy=self.build_strategy,
exec_strategy=self.exec_strategy,
)
else:
self.train_program = self.main_program
self.train_program = self.main_program
def _generate_init_data(self):
init_hidden = np.zeros(
......@@ -621,29 +612,27 @@ class PaddingRNNTestBase(unittest.TestCase):
ppl = np.append(ppl, batch_ppl)
return ppl
def train(self, config, parallel=True, use_program_cache=True):
def train(self, config, use_program_cache=True):
self.set_customed_config()
self.config = config
self._prepare_program(config, parallel)
self._prepare_program(config)
ppl = np.zeros(shape=(0, config.batch_size))
for epoch_id in range(config.max_epoch):
train_ppl = self._train_an_epoch(epoch_id, use_program_cache)
ppl = np.append(ppl, train_ppl)
return ppl
def compare_padding_static_mode(
self, parallel=True, use_program_cache=True
):
def compare_padding_static_mode(self, use_program_cache=True):
'''
Test that train ppl of padding mode is same to that of static graph mode
'''
config = RNNConfig('test', 'padding')
with fluid.scope_guard(fluid.Scope()):
padding_rnn_ppl = self.train(config, parallel, use_program_cache)
padding_rnn_ppl = self.train(config, use_program_cache)
config = RNNConfig('test', 'static')
with fluid.scope_guard(fluid.Scope()):
static_rnn_ppl = self.train(config, parallel, use_program_cache)
static_rnn_ppl = self.train(config, use_program_cache)
np.testing.assert_allclose(padding_rnn_ppl, static_rnn_ppl, rtol=0.001)
......@@ -654,7 +643,7 @@ class EagerDeletionPaddingRNNTest(PaddingRNNTestBase):
'''
fluid.core._set_eager_deletion_mode(-1.0, 1.0, True)
# When parallel is True, use_program_cache does not make a difference.
self.compare_padding_static_mode(parallel=True, use_program_cache=True)
self.compare_padding_static_mode(use_program_cache=True)
def test_padding_mode_eager_deletion(self):
'''
......@@ -662,7 +651,7 @@ class EagerDeletionPaddingRNNTest(PaddingRNNTestBase):
'''
fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
# When parallel is True, use_program_cache does not make a difference.
self.compare_padding_static_mode(parallel=True, use_program_cache=True)
self.compare_padding_static_mode(use_program_cache=True)
if __name__ == '__main__':
......
......@@ -16,14 +16,12 @@ import os
os.environ['CPU_NUM'] = '2'
import multiprocessing
import unittest
import numpy
import paddle
import paddle.fluid as fluid
import paddle.fluid.compiler as compiler
import paddle.fluid.core as core
import paddle.fluid.layers as layers
from paddle.fluid.executor import Executor
......@@ -41,30 +39,19 @@ class TestEagerDeletionWhileOpBase(unittest.TestCase):
places.append(core.CUDAPlace(0))
for p in places:
for with_data_parallel in [False, True]:
with fluid.program_guard(fluid.Program(), fluid.Program()):
with fluid.scope_guard(fluid.Scope()):
self.run_main(p, with_data_parallel)
with fluid.program_guard(fluid.Program(), fluid.Program()):
with fluid.scope_guard(fluid.Scope()):
self.run_main(p)
def run_main(self, place, with_data_parallel):
def run_main(self, place):
self.place = place
self.with_data_parallel = with_data_parallel
if not core.is_compiled_with_cuda() and isinstance(
self.place, core.CUDAPlace
):
return
if isinstance(self.place, core.CUDAPlace):
device_cnt = (
core.get_cuda_device_count() if self.with_data_parallel else 1
)
else:
device_cnt = (
int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
if self.with_data_parallel
else 1
)
device_cnt = 1
d0 = paddle.static.data("d0", shape=[-1, 10], dtype='float32')
d1 = paddle.static.data("d1", shape=[-1, 10], dtype='float32')
......@@ -139,19 +126,12 @@ class TestEagerDeletionWhileOpBase(unittest.TestCase):
exe.run(fluid.default_startup_program())
prog = fluid.default_main_program()
if self.with_data_parallel:
prog = compiler.CompiledProgram(
fluid.default_main_program()
).with_data_parallel(loss_name=loss.name)
for _ in range(5):
d = []
for i in range(3):
tmp = numpy.random.random(size=[10]).astype('float32')
if not self.with_data_parallel:
d.append(tmp)
else:
d.append(numpy.array([tmp] * device_cnt))
d.append(numpy.array([tmp] * device_cnt))
outs = exe.run(
program=prog,
......
......@@ -83,11 +83,11 @@ class TestExecutor(unittest.TestCase):
class ExecutorPaddingRNNTest(PaddingRNNTestBase):
def train_and_save_inference_program(
self, rnn_model="static", parallel=True, use_program_cache=True
self, rnn_model="static", use_program_cache=True
):
config = RNNConfig("test", rnn_model)
with fluid.scope_guard(fluid.Scope()):
self.train(config, parallel, use_program_cache)
self.train(config, use_program_cache)
fluid.io.save_inference_model(
main_program=self.main_program,
feeded_var_names=self.feed_order,
......@@ -101,7 +101,7 @@ class ExecutorPaddingRNNTest(PaddingRNNTestBase):
for rnn_model in ["static", "padding"]:
# Set parallel to False to use the default executor.
self.train_and_save_inference_program(
rnn_model=rnn_model, parallel=True, use_program_cache=True
rnn_model=rnn_model, use_program_cache=True
)
x_np = np.random.random(
......
......@@ -64,9 +64,7 @@ class TestExecutor(unittest.TestCase):
exe = fluid.Executor(cpu)
lr, cost = self.net()
exe.run(startup_program)
compiled_prog = fluid.CompiledProgram(
main_program
).with_data_parallel(loss_name=cost.name)
compiled_prog = fluid.CompiledProgram(main_program)
train_data = [[1.0], [2.0], [3.0], [4.0]]
y_true = [[2.0], [4.0], [6.0], [8.0]]
a = 0
......
......@@ -119,9 +119,7 @@ class TestExecutor(unittest.TestCase):
cpu = fluid.CPUPlace()
exe = fluid.Executor(cpu)
exe.run(startup_program)
compiled_prog = fluid.CompiledProgram(
main_program
).with_data_parallel(loss_name=cost.name)
compiled_prog = fluid.CompiledProgram(main_program)
train_data = numpy.array([[1.0], [2.0], [3.0], [4.0]]).astype(
'float32'
)
......
......@@ -72,28 +72,20 @@ class TestExecutorReturnTensorNotOverOverwritingWithLayers(unittest.TestCase):
def setUp(self):
pass
def calc_add_out(self, place=None, parallel=None):
def calc_add_out(self, place=None):
x = paddle.ones(shape=[3, 3], dtype='float32')
y = paddle.ones(shape=[3, 3], dtype='float32')
out = paddle.add(x=x, y=y)
program = fluid.default_main_program()
if parallel:
program = fluid.CompiledProgram(program).with_data_parallel(
places=place
)
exe = fluid.Executor(place)
out = exe.run(program, fetch_list=[out], return_numpy=False)
return out
def calc_sub_out(self, place=None, parallel=None):
def calc_sub_out(self, place=None):
x = paddle.ones(shape=[2, 2], dtype='float32')
y = paddle.ones(shape=[2, 2], dtype='float32')
out = paddle.subtract(x=x, y=y)
program = fluid.default_main_program()
if parallel:
program = fluid.CompiledProgram(program).with_data_parallel(
places=place
)
exe = fluid.Executor(place)
out = exe.run(program, fetch_list=[out], return_numpy=False)
return out
......@@ -104,12 +96,11 @@ class TestExecutorReturnTensorNotOverOverwritingWithLayers(unittest.TestCase):
places.append(fluid.CUDAPlace(0))
for place in places:
for parallel in [True, False]:
add_out = self.calc_add_out(place, parallel)
add_out1 = np.array(add_out[0])
sub_out = self.calc_sub_out(place, parallel)
add_out2 = np.array(add_out[0])
np.testing.assert_array_equal(add_out1, add_out2)
add_out = self.calc_add_out(place)
add_out1 = np.array(add_out[0])
sub_out = self.calc_sub_out(place)
add_out2 = np.array(add_out[0])
np.testing.assert_array_equal(add_out1, add_out2)
if __name__ == '__main__':
......
......@@ -20,7 +20,6 @@ import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.compiler as compiler
import paddle.fluid.core as core
os.environ['CPU_NUM'] = str(4)
......@@ -46,16 +45,12 @@ class TestFeedData(unittest.TestCase):
else int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
)
def _get_feed_batch_size(self, use_cuda, use_parallel_executor):
def _get_feed_batch_size(self, use_cuda):
"""
Returns actual fed data size. We should multiple the number of
devices when it is using ParallelExecutor
"""
return (
self.data_batch_size * self._get_device_count(use_cuda)
if use_parallel_executor
else self.data_batch_size
)
return self.data_batch_size
def _simple_fc_net(self, in_size, label_size, class_num, hidden_sizes):
in_data = fluid.data(name="data", dtype='float32', shape=in_size)
......@@ -85,57 +80,45 @@ class TestFeedData(unittest.TestCase):
for use_cuda in (
[True, False] if core.is_compiled_with_cuda() else [False]
):
for use_parallel_executor in [False, True]:
print('Test Parameters:'),
print(
{
'use_cuda': use_cuda,
'use_parallel_executor': use_parallel_executor,
}
)
# Test feeding without error
self._test_feed_data_match_shape_type(
use_cuda, use_parallel_executor
)
self._test_feed_data_contains_neg_one(
use_cuda, use_parallel_executor
)
self._test_feed_lod_tensor(use_cuda, use_parallel_executor)
# Test exception message when feeding with error
in_shape_tuple = (-1, 3, 4, 8)
error_shape_list = [self.data_batch_size, 3, 4, 5]
with self.assertRaises(ValueError) as shape_mismatch_err:
self._test_feed_data_shape_mismatch(
use_cuda, use_parallel_executor
)
self.assertEqual(
str(shape_mismatch_err.exception),
"The fed Variable %r should have dimensions = %r, "
"shape = %r, but received fed shape %r on each device"
% (
'data',
len(in_shape_tuple),
in_shape_tuple,
error_shape_list,
),
)
with self.assertRaises(ValueError) as dtype_mismatch_err:
self._test_feed_data_dtype_mismatch(
use_cuda, use_parallel_executor
)
self.assertEqual(
str(dtype_mismatch_err.exception),
"The data type of fed Variable %r must be 'int64', but "
"received 'float64'" % ('label'),
)
def _test_feed_data_dtype_mismatch(self, use_cuda, use_parallel_executor):
feed_batch_size = self._get_feed_batch_size(
use_cuda, use_parallel_executor
)
print('Test Parameters:'),
print(
{
'use_cuda': use_cuda,
}
)
# Test feeding without error
self._test_feed_data_match_shape_type(use_cuda)
self._test_feed_data_contains_neg_one(use_cuda)
self._test_feed_lod_tensor(use_cuda)
# Test exception message when feeding with error
in_shape_tuple = (-1, 3, 4, 8)
error_shape_list = [self.data_batch_size, 3, 4, 5]
with self.assertRaises(ValueError) as shape_mismatch_err:
self._test_feed_data_shape_mismatch(use_cuda)
self.assertEqual(
str(shape_mismatch_err.exception),
"The fed Variable %r should have dimensions = %r, "
"shape = %r, but received fed shape %r on each device"
% (
'data',
len(in_shape_tuple),
in_shape_tuple,
error_shape_list,
),
)
with self.assertRaises(ValueError) as dtype_mismatch_err:
self._test_feed_data_dtype_mismatch(use_cuda)
self.assertEqual(
str(dtype_mismatch_err.exception),
"The data type of fed Variable %r must be 'int64', but "
"received 'float64'" % ('label'),
)
def _test_feed_data_dtype_mismatch(self, use_cuda):
feed_batch_size = self._get_feed_batch_size(use_cuda)
in_size = [self.data_batch_size, 3, 4, 5]
feed_in_data = np.random.uniform(
size=[feed_batch_size, 3, 4, 5]
......@@ -150,11 +133,10 @@ class TestFeedData(unittest.TestCase):
feed_in_data,
feed_label,
use_cuda,
use_parallel_executor,
)
def _test_feed_data_shape_mismatch(self, use_cuda, use_parallel_executor):
batch_size = self._get_feed_batch_size(use_cuda, use_parallel_executor)
def _test_feed_data_shape_mismatch(self, use_cuda):
batch_size = self._get_feed_batch_size(use_cuda)
in_size = [None, 3, 4, 8]
feed_in_data = np.random.uniform(size=[batch_size, 3, 4, 5]).astype(
np.float32
......@@ -169,11 +151,10 @@ class TestFeedData(unittest.TestCase):
feed_in_data,
feed_label,
use_cuda,
use_parallel_executor,
)
def _test_feed_data_contains_neg_one(self, use_cuda, use_parallel_executor):
batch_size = self._get_feed_batch_size(use_cuda, use_parallel_executor)
def _test_feed_data_contains_neg_one(self, use_cuda):
batch_size = self._get_feed_batch_size(use_cuda)
in_size = [-1, 3, 4, 5]
feed_in_data = np.random.uniform(size=[batch_size, 3, 4, 5]).astype(
np.float32
......@@ -188,13 +169,10 @@ class TestFeedData(unittest.TestCase):
feed_in_data,
feed_label,
use_cuda,
use_parallel_executor,
)
def _test_feed_data_match_shape_type(self, use_cuda, use_parallel_executor):
feed_batch_size = self._get_feed_batch_size(
use_cuda, use_parallel_executor
)
def _test_feed_data_match_shape_type(self, use_cuda):
feed_batch_size = self._get_feed_batch_size(use_cuda)
in_size = [self.data_batch_size, 3, 4, 5]
feed_in_data = np.random.uniform(
size=[feed_batch_size, 3, 4, 5]
......@@ -209,10 +187,9 @@ class TestFeedData(unittest.TestCase):
feed_in_data,
feed_label,
use_cuda,
use_parallel_executor,
)
def _test_feed_lod_tensor(self, use_cuda, use_parallel_executor):
def _test_feed_lod_tensor(self, use_cuda):
device_count = self._get_device_count(use_cuda)
in_size = [device_count, 3, 4, 5]
......@@ -241,7 +218,6 @@ class TestFeedData(unittest.TestCase):
feed_data_tensor,
feed_label_tensor,
use_cuda,
use_parallel_executor,
)
def _feed_data_in_executor(
......@@ -251,7 +227,6 @@ class TestFeedData(unittest.TestCase):
feed_in_data,
feed_label,
use_cuda,
use_parallel_executor,
):
startup_program = fluid.Program()
......@@ -268,10 +243,6 @@ class TestFeedData(unittest.TestCase):
exe.run(startup_program)
train_program = main_program
if use_parallel_executor:
train_program = compiler.CompiledProgram(
main_program
).with_data_parallel(loss_name=loss.name)
for i in range(self.iterations):
fetches = exe.run(
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import numpy as np
......@@ -44,7 +43,6 @@ class TestFetchLoDTensorArray(unittest.TestCase):
return loss, array
def check_network(self, use_cuda=True):
os.environ["CPU_NUM"] = str(2)
main_program = fluid.Program()
startup_program = fluid.Program()
......@@ -60,35 +58,15 @@ class TestFetchLoDTensorArray(unittest.TestCase):
feed_dict = {'image': image, 'label': label}
build_strategy = fluid.BuildStrategy()
binary = fluid.CompiledProgram(main_program).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy
binary = fluid.CompiledProgram(
main_program, build_strategy=build_strategy
)
device_num = fluid.core.get_cuda_device_count() if use_cuda else 2
for _ in range(3):
loss_v, array_v = exe.run(
binary,
feed=feed_dict,
fetch_list=[loss, array],
return_merged=False,
binary, feed=feed_dict, fetch_list=[loss, array]
)
self.assertEqual(np.array(loss_v).shape, (device_num, 1))
self.assertEqual(
np.array(array_v[0][0]).shape, (batch_size / device_num, 784)
)
self.assertEqual(
np.array(array_v[0][1]).shape, (batch_size / device_num, 1)
)
self.assertEqual(np.array(array_v[0][2]).shape, (1,))
for _ in range(3):
loss_v, array_v = exe.run(
binary,
feed=feed_dict,
fetch_list=[loss, array],
return_merged=True,
)
self.assertEqual(np.array(loss_v).shape, (device_num,))
self.assertEqual(np.array(loss_v).shape, (1,))
self.assertEqual(np.array(array_v[0]).shape, (batch_size, 784))
self.assertEqual(np.array(array_v[1]).shape, (batch_size, 1))
np.testing.assert_allclose(loss_v, array_v[2], rtol=1e-05)
......@@ -98,13 +76,6 @@ class TestFetchLoDTensorArray(unittest.TestCase):
self.check_network(use_cuda=True)
self.check_network(use_cuda=False)
def test_fetch_unmerged_parallel_graph(self):
fluid.core.globals()['FLAGS_enable_parallel_graph'] = True
if fluid.core.is_compiled_with_cuda():
self.check_network(use_cuda=True)
self.check_network(use_cuda=False)
fluid.core.globals()['FLAGS_enable_parallel_graph'] = False
if __name__ == '__main__':
unittest.main()
......@@ -83,8 +83,8 @@ class TestFuseBatchNormActPass(unittest.TestCase):
# close fused_bn_act_ops
build_strategy = fluid.BuildStrategy()
build_strategy.fuse_bn_act_ops = False
binary = fluid.CompiledProgram(main_program).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy
binary = fluid.CompiledProgram(
main_program, build_strategy=build_strategy
)
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=batch_size
......@@ -103,8 +103,8 @@ class TestFuseBatchNormActPass(unittest.TestCase):
# open fused_bn_act_ops
build_strategy_fused = fluid.BuildStrategy()
build_strategy_fused.fuse_bn_act_ops = True
binary_fused = fluid.CompiledProgram(main_program).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy_fused
binary_fused = fluid.CompiledProgram(
main_program, build_strategy=build_strategy_fused
)
train_reader_fused = paddle.batch(
paddle.dataset.mnist.train(), batch_size=batch_size
......
......@@ -198,8 +198,8 @@ class TestFusedBnAddActAPI(unittest.TestCase):
)
build_strategy_fused = fluid.BuildStrategy()
build_strategy_fused.fuse_bn_add_act_ops = True
binary_fused = fluid.CompiledProgram(main_program).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy_fused
binary_fused = fluid.CompiledProgram(
main_program, build_strategy=build_strategy_fused
)
exe = fluid.Executor(place)
loss_vals_fused = []
......@@ -221,8 +221,8 @@ class TestFusedBnAddActAPI(unittest.TestCase):
# build_origin_program: turn off fused_bn_act_ops
build_strategy = fluid.BuildStrategy()
build_strategy.fuse_bn_add_act_ops = False
binary = fluid.CompiledProgram(main_program).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy_fused
binary = fluid.CompiledProgram(
main_program, build_strategy=build_strategy_fused
)
loss_vals = []
scope = fluid.Scope()
......
......@@ -146,11 +146,8 @@ class TestFuseGemmEpilogueFWDBase(unittest.TestCase):
def _test_output(self):
build_strategy = paddle.static.BuildStrategy()
build_strategy.fuse_gemm_epilogue = True
program = paddle.static.CompiledProgram(self.main_prog)
program = program.with_data_parallel(
loss_name=self.loss.name,
build_strategy=build_strategy,
places=paddle.static.cuda_places(),
program = paddle.static.CompiledProgram(
self.main_prog, build_strategy=build_strategy
)
result = self.exe.run(
......@@ -332,11 +329,8 @@ class TestFuseGemmEpilogueBWDBase(unittest.TestCase):
def _test_output(self):
build_strategy = paddle.static.BuildStrategy()
build_strategy.fuse_gemm_epilogue = True
program = paddle.static.CompiledProgram(self.main_prog)
program = program.with_data_parallel(
loss_name=self.loss.name,
build_strategy=build_strategy,
places=paddle.static.cuda_places(),
program = paddle.static.CompiledProgram(
self.main_prog, build_strategy=build_strategy
)
outs_res = self.exe.run(program, feed=self.feed, fetch_list=self.fetch)
......
......@@ -238,9 +238,7 @@ class TestInstance(unittest.TestCase):
# will print warning message
cp_prog = CompiledProgram(program).with_data_parallel(
loss_name=avg_cost.name
)
cp_prog = CompiledProgram(program)
save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, cp_prog)
self.assertRaises(
......
......@@ -97,9 +97,7 @@ class TestInplaceAddto(unittest.TestCase):
strategy = fluid.BuildStrategy()
strategy.enable_addto = enable_addto
compiled = fluid.CompiledProgram(main).with_data_parallel(
loss_name=loss.name, build_strategy=strategy
)
compiled = fluid.CompiledProgram(main, build_strategy=strategy)
exe.run(startup)
img = np.random.uniform(-128, 128, [8, 3, 224, 224]).astype(
......
......@@ -64,9 +64,7 @@ class TestSoftmaxWithXe(unittest.TestCase):
build_strategy = fluid.BuildStrategy()
build_strategy.enable_inplace = inplace
prog = fluid.CompiledProgram(
fluid.default_main_program()
).with_data_parallel(
build_strategy=build_strategy, places=place
fluid.default_main_program(), build_strategy=build_strategy
)
fetch_list = [z_d.name, s_d.name]
......
......@@ -41,8 +41,8 @@ class TestMemoryReuseExcludeFeedVar(unittest.TestCase):
exe.run(fluid.default_startup_program())
compiled_prog = fluid.CompiledProgram(
fluid.default_main_program()
).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
fluid.default_main_program(), build_strategy=build_strategy
)
image_tensor = fluid.LoDTensor()
np_image = np.random.uniform(
......
......@@ -68,9 +68,7 @@ class TestReaderReset(unittest.TestCase):
paddle.batch(self.prepare_data(), batch_size=self.batch_size)
)
train_cp = compiler.CompiledProgram(main_prog).with_data_parallel(
places=[place]
)
train_cp = compiler.CompiledProgram(main_prog)
batch_id = 0
pass_count = 0
......
......@@ -90,9 +90,7 @@ class TestResnet50Accuracy(unittest.TestCase):
loss = self.build_program(main_program, startup_program)
exe = paddle.static.Executor(place)
compiled_prog = paddle.static.CompiledProgram(
main_program
).with_data_parallel(loss_name=loss.name)
compiled_prog = paddle.static.CompiledProgram(main_program)
loss_vals = []
scope = paddle.static.Scope()
......
......@@ -126,11 +126,7 @@ class TestWeightDecay(unittest.TestCase):
build_strategy.memory_optimize = use_ir_memory_optimize
train_cp = compiler.CompiledProgram(
fluid.default_main_program()
).with_data_parallel(
loss_name=loss.name,
exec_strategy=exec_strategy,
build_strategy=build_strategy,
fluid.default_main_program(), build_strategy=build_strategy
)
loss_set = []
......
......@@ -76,11 +76,11 @@ class TestGraph(unittest.TestCase):
build_strategy.memory_optimize = False
build_strategy.enable_inplace = False
origin_binary = paddle.static.CompiledProgram(
graph.graph
).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
graph.graph, build_strategy=build_strategy
)
backup_binary = paddle.static.CompiledProgram(
backup_graph.graph
).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
backup_graph.graph, build_strategy=build_strategy
)
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
exe = paddle.static.Executor(place)
exe.run(startup)
......
......@@ -70,9 +70,7 @@ class TestMovingAverageAbsMaxScaleOp(unittest.TestCase):
exe = paddle.static.Executor(place)
exe.run(startup_program)
binary = paddle.static.CompiledProgram(main_program).with_data_parallel(
loss_name=loss.name
)
binary = paddle.static.CompiledProgram(main_program)
img, label = init_data()
feed_dict = {"image": img, "label": label}
......
......@@ -143,8 +143,8 @@ class TestMKLDNNTransformBasedFreezePass(unittest.TestCase):
build_strategy.memory_optimize = False
build_strategy.enable_inplace = False
binary = paddle.static.CompiledProgram(
main_graph.graph
).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
main_graph.graph, build_strategy=build_strategy
)
quantized_test_program = test_graph.to_program()
iters = 5
batch_size = 8
......
......@@ -373,8 +373,8 @@ class TestQuantizationFreezePass(unittest.TestCase):
build_strategy.enable_inplace = False
build_strategy.fuse_all_reduce_ops = False
binary = paddle.static.CompiledProgram(
main_graph.graph
).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
main_graph.graph, build_strategy=build_strategy
)
quantized_test_program = test_graph.to_program()
iters = 5
batch_size = 8
......
......@@ -143,8 +143,8 @@ class TestQuantizationScalePass(unittest.TestCase):
build_strategy.enable_inplace = False
build_strategy.fuse_all_reduce_ops = False
binary = paddle.static.CompiledProgram(
main_graph.graph
).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
main_graph.graph, build_strategy=build_strategy
)
iters = 5
batch_size = 8
......
......@@ -191,8 +191,8 @@ class TestUserDefinedQuantization(unittest.TestCase):
build_strategy.enable_inplace = False
build_strategy.fuse_all_reduce_ops = False
binary = paddle.static.CompiledProgram(
main_graph.graph
).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
main_graph.graph, build_strategy=build_strategy
)
iters = 5
batch_size = 8
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册