未验证 提交 15de2dff 编写于 作者: D dzhwinter 提交者: GitHub

Merge pull request #15926 from dzhwinter/test/add_ir_mem_opt_tests

add ir memory optimize test base
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import six
import unittest
import time
import math
import multiprocessing
import numpy as np
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
from paddle.fluid import compiler
# open eager delete mode
os.environ['FLAGS_eager_delete_tensor_gb'] = '0.0'
os.environ['FLAGS_fast_eager_deletion_mode'] = 'true'
os.environ['CPU_NUM'] = '2'
class BuildIrMemOptBase(unittest.TestCase):
def check_network_convergence(self,
network,
use_cuda=True,
memory_opt=True,
use_ir_memory_optimize=True,
enable_inplace=True,
iter=5):
if use_cuda and not core.is_compiled_with_cuda():
print('Skip use_cuda=True because Paddle is not compiled with cuda')
return
if os.name == 'nt':
print(
'Skip use_parallel_executor=True because Paddle comes without parallel support on windows'
)
return
fluid.default_startup_program().random_seed = 100
fluid.default_main_program().random_seed = 100
batch_size = 32
batch_size *= fluid.core.get_cuda_device_count() if use_cuda else int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
# build network
word_dict = paddle.dataset.imdb.word_dict()
train_reader = paddle.batch(
paddle.dataset.imdb.train(word_dict), batch_size=batch_size)
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
cost = network(data, label, len(word_dict))
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
optimizer.minimize(cost)
if memory_opt:
fluid.memory_optimize(fluid.default_main_program())
# execution
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
reader = feeder.decorate_reader(train_reader, multi_devices=True)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
train_cp = compiler.CompiledProgram(fluid.default_main_program())
train_cp = train_cp.with_data_parallel(loss_name=cost.name)
fetch_list = [cost.name]
begin = time.time()
first_loss, last_loss = None, None
step_id = 0
custom_iter = getattr(self, "iter", None)
if not custom_iter == None:
iter = custom_iter
for data in reader():
ret = exe.run(train_cp, feed=data, fetch_list=fetch_list)
print(ret)
step_id += 1
if step_id == 1:
first_loss = ret[0]
if step_id == iter:
last_loss = ret[0]
break
end = time.time()
print("%.4f Instance per second" % (
(batch_size * iter) / (end - begin)))
print(first_loss, last_loss)
avg_last_loss_val = np.array(last_loss).mean()
avg_first_loss_val = np.array(first_loss).mean()
if math.isnan(float(avg_last_loss_val)) or math.isnan(
float(avg_first_loss_val)):
sys.exit("got NaN loss, training failed.")
return first_loss, last_loss
class TestIrMemOptBase(BuildIrMemOptBase):
def setUp(self):
self.network = None
def test_network(self):
if self.network is None or not core.is_compiled_with_cuda():
return
baseline_first_loss, baseline_last_loss = None, None
for use_cuda in [True]:
for use_python_mem_opt in [True, False]:
print(
'network: {}, use_cuda: {}, use_python_mem_opt: {}, use_ir_mem_opt : {}'.
format(self.network.__name__, use_cuda, use_python_mem_opt,
not use_python_mem_opt))
with fluid.program_guard(fluid.Program(), fluid.Program()):
with fluid.scope_guard(core.Scope()):
if use_cuda is True and use_python_mem_opt is True:
baseline_first_loss, baseline_last_loss = self.check_network_convergence(
self.network,
use_cuda=use_cuda,
memory_opt=use_python_mem_opt)
else:
cur_first_loss, cur_last_loss = self.check_network_convergence(
self.network,
use_cuda=use_cuda,
memory_opt=use_python_mem_opt)
self.assertAlmostEquals(
np.mean(baseline_last_loss),
np.mean(cur_last_loss),
delta=1e-2)
self.assertAlmostEquals(
np.mean(baseline_first_loss),
np.mean(cur_first_loss),
delta=1e-2)
......@@ -56,6 +56,8 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
train_reader, multi_devices=use_parallel_executor)
exe = fluid.Executor(place)
fluid.default_startup_program().random_seed = 1
fluid.default_main_program().random_seed = 1
exe.run(fluid.default_startup_program())
train_cp = compiler.CompiledProgram(fluid.default_main_program())
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# nlp model stack of op operate on lod. It's a classical test case in optimize pass.
from __future__ import print_function
import paddle.fluid as fluid
import unittest
from ir_memory_optimize_net_base import TestIrMemOptBase
def lstm_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2,
emb_lr=30.0):
emb = fluid.layers.embedding(
input=data,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr))
fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
lstm_h, c = fluid.layers.dynamic_lstm(
input=fc0, size=hid_dim * 4, is_reverse=False)
lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
lstm_max_tanh = fluid.layers.tanh(lstm_max)
fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return avg_cost
class TestIrMemOptRNN(TestIrMemOptBase):
def setUp(self):
self.network = lstm_net
if __name__ == "__main__":
unittest.main()
......@@ -28,9 +28,6 @@ os.environ[
from test_parallel_executor_transformer import transformer, ModelHyperParams, transformer_model, transformer, prepare_batch_input
from parallel_executor_test_base import TestParallelExecutorBase
# disable temporarily because of timeout.
sys.exit(0)
# NOTE(dzhwinter): test diferent strategy colisions.
# open the eager delete tensor strategy by default.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册