未验证 提交 0b032fae 编写于 作者: C Chen Weihang 提交者: GitHub

Polish unittests details and execution conditions to adapt to MUSL (#29044)

* fix failed tests in yingchun gived list

* add unittests into static_mode_white_list

* add enable static

* fix dist unittest

* skip test_sigmoid_focal_loss_op & add gym

* revert no need skip unittests

* remove gym
上级 92817f80
......@@ -240,7 +240,8 @@ elseif(WITH_PSLIB)
lod_rank_table fs shell fleet_wrapper heter_wrapper box_wrapper lodtensor_printer feed_fetch_method
graph_to_program_pass variable_helper timer monitor pslib_brpc )
# TODO: Fix these unittest failed on Windows
if(NOT WIN32)
# This unittest will always failed, now no CI will run this unittest
if(NOT WITH_MUSL AND NOT WIN32)
cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op)
endif()
else()
......@@ -254,7 +255,8 @@ else()
lod_rank_table fs shell fleet_wrapper heter_wrapper box_wrapper lodtensor_printer feed_fetch_method
graph_to_program_pass variable_helper timer monitor)
# TODO: Fix these unittest failed on Windows
if(NOT WIN32)
# This unittest will always failed, now no CI will run this unittest
if(NOT WITH_MUSL AND NOT WIN32)
cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op)
endif()
endif()
......
......@@ -82,34 +82,58 @@ if(NOT WITH_GPU OR WIN32)
endif()
if(WIN32)
LIST(REMOVE_ITEM TEST_OPS test_boxps)
LIST(REMOVE_ITEM TEST_OPS test_trainer_desc)
LIST(REMOVE_ITEM TEST_OPS test_multiprocess_reader_exception)
LIST(REMOVE_ITEM TEST_OPS test_avoid_twice_initialization)
LIST(REMOVE_ITEM TEST_OPS test_checkpoint_notify_op)
endif()
LIST(REMOVE_ITEM TEST_OPS test_distributed_strategy)
if(WIN32)
LIST(REMOVE_ITEM TEST_OPS test_multiprocess_reader_exception)
LIST(REMOVE_ITEM TEST_OPS test_trainer_desc)
LIST(REMOVE_ITEM TEST_OPS test_checkpoint_notify_op)
LIST(REMOVE_ITEM TEST_OPS test_downpoursgd)
LIST(REMOVE_ITEM TEST_OPS test_fleet)
LIST(REMOVE_ITEM TEST_OPS test_fleet_metric)
LIST(REMOVE_ITEM TEST_OPS test_fleet_nocvm_1)
LIST(REMOVE_ITEM TEST_OPS test_fleet_ps)
LIST(REMOVE_ITEM TEST_OPS test_fleet_rolemaker)
LIST(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_2)
LIST(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_3)
LIST(REMOVE_ITEM TEST_OPS test_fleet_unitaccessor)
LIST(REMOVE_ITEM TEST_OPS test_fleet_utils)
LIST(REMOVE_ITEM TEST_OPS test_lookup_sparse_table_split_op)
LIST(REMOVE_ITEM TEST_OPS test_ps_dispatcher)
# TODO: Fix these unittests failed on Windows
LIST(REMOVE_ITEM TEST_OPS test_debugger)
endif()
if(NOT WITH_DISTRIBUTE OR WIN32)
# DISTRIBUTE related
LIST(REMOVE_ITEM TEST_OPS test_avoid_twice_initialization)
LIST(REMOVE_ITEM TEST_OPS test_distributed_strategy)
LIST(REMOVE_ITEM TEST_OPS test_fleet_metric)
LIST(REMOVE_ITEM TEST_OPS test_fleet_ps)
LIST(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_2)
LIST(REMOVE_ITEM TEST_OPS test_fleet_utils)
LIST(REMOVE_ITEM TEST_OPS test_lookup_sparse_table_split_op)
# TODO: Fix these unittests failed on Windows
list(REMOVE_ITEM TEST_OPS test_fake_init_op)
list(REMOVE_ITEM TEST_OPS test_merge_ids_op)
list(REMOVE_ITEM TEST_OPS test_split_ids_op)
LIST(REMOVE_ITEM TEST_OPS test_ref_by_trainer_id_op)
endif()
if(NOT WITH_DISTRIBUTE)
LIST(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_new)
LIST(REMOVE_ITEM TEST_OPS test_desc_clone_dist)
LIST(REMOVE_ITEM TEST_OPS test_program_code_dist)
endif()
if(WITH_MUSL)
# TODO: In the musl docker environment provided by SEC,
# the calculation accuracy of testcase in this unittest
# cannot meet the requirement, error like:
# AssertionError:
# 2.3044646853182973e-07 not less than or equal to 1e-07
# SEC needs to follow up on this issue, and need to be
# resolved before CI requared
LIST(REMOVE_ITEM TEST_OPS test_sigmoid_focal_loss_op)
endif()
LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint)
LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint1)
......@@ -185,8 +209,12 @@ if(NOT WITH_MKL OR NOT WITH_AVX)
list(REMOVE_ITEM TEST_OPS test_match_matrix_tensor_op)
list(REMOVE_ITEM TEST_OPS test_var_conv_2d)
endif()
if(WITH_COVERAGE OR WIN32 OR WITH_NV_JETSON)
list(REMOVE_ITEM TEST_OPS test_pyramid_hash_op)
endif()
if(NOT WITH_DISTRIBUTE OR WITH_COVERAGE OR WIN32 OR WITH_NV_JETSON)
list(REMOVE_ITEM TEST_OPS test_fleet_pyramid_hash)
endif()
......@@ -561,7 +589,7 @@ if(NOT WIN32)
set_tests_properties(test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450)
endif()
if(NOT APPLE AND NOT WIN32)
if(WITH_DISTRIBUTE AND NOT APPLE AND NOT WIN32)
bash_test_modules(test_auto_checkpoint START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
bash_test_modules(test_auto_checkpoint1 START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
bash_test_modules(test_auto_checkpoint2 START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
......@@ -631,6 +659,9 @@ if (NOT WIN32)
set_tests_properties(test_multiprocess_reader_exception PROPERTIES TIMEOUT 120)
set_tests_properties(test_layers PROPERTIES TIMEOUT 120)
set_tests_properties(test_ir_memory_optimize_transformer PROPERTIES TIMEOUT 120)
endif()
if (WITH_DISTRIBUTE AND NOT WIN32)
set_tests_properties(test_fleet_utils PROPERTIES TIMEOUT 120)
endif()
......
......@@ -100,16 +100,6 @@ def get_model(batch_size):
return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict
def get_transpiler(trainer_id, main_program, pserver_endpoints, trainers):
t = fluid.DistributeTranspiler()
t.transpile(
trainer_id=trainer_id,
program=main_program,
pservers=pserver_endpoints,
trainers=trainers)
return t
def operator_equal(a, b):
if a.__str__() != b.__str__():
raise ValueError("In operator_equal not equal\n")
......@@ -178,27 +168,6 @@ def program_equal(a, b):
return True
class TestDistMnist(unittest.TestCase):
@unittest.skipIf(sys.platform == "win32",
"Windows does not support distribution")
def test_desc_clone(self):
get_model(batch_size=20)
pserver_endpoints = "127.0.0.1:9123"
trainers = 1
current_endpoint = "127.0.0.1:9123"
t = get_transpiler(0,
fluid.default_main_program(), pserver_endpoints,
trainers)
pserver_prog = t.get_pserver_program(current_endpoint)
startup_prog = t.get_startup_program(current_endpoint, pserver_prog)
main = pserver_prog.clone()
startup = startup_prog.clone()
self.assertTrue(program_equal(main, pserver_prog))
self.assertTrue(program_equal(startup, startup_prog))
class TestCloneWithStopGradient(unittest.TestCase):
def test_clone_with_stop_gradient(self):
train_program = fluid.Program()
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import paddle
import paddle.fluid as fluid
from test_desc_clone import get_model, program_equal
def get_transpiler(trainer_id, main_program, pserver_endpoints, trainers):
t = fluid.DistributeTranspiler()
t.transpile(
trainer_id=trainer_id,
program=main_program,
pservers=pserver_endpoints,
trainers=trainers)
return t
class TestDistMnist(unittest.TestCase):
def test_desc_clone(self):
paddle.enable_static()
get_model(batch_size=20)
pserver_endpoints = "127.0.0.1:9123"
trainers = 1
current_endpoint = "127.0.0.1:9123"
t = get_transpiler(0,
fluid.default_main_program(), pserver_endpoints,
trainers)
pserver_prog = t.get_pserver_program(current_endpoint)
startup_prog = t.get_startup_program(current_endpoint, pserver_prog)
main = pserver_prog.clone()
startup = startup_prog.clone()
self.assertTrue(program_equal(main, pserver_prog))
self.assertTrue(program_equal(startup, startup_prog))
......@@ -83,7 +83,10 @@ class TestTensorDataset(unittest.TestCase):
assert np.allclose(label.numpy(), label_np[i])
def test_main(self):
for p in [fluid.CPUPlace(), fluid.CUDAPlace(0)]:
places = [fluid.CPUPlace()]
if fluid.core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
self.run_main(num_workers=0, places=p)
......@@ -132,7 +135,10 @@ class TestChainDataset(unittest.TestCase):
idx += 1
def test_main(self):
for p in [fluid.CPUPlace(), fluid.CUDAPlace(0)]:
places = [fluid.CPUPlace()]
if fluid.core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
self.run_main(num_workers=0, places=p)
......
......@@ -12,71 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import time
import unittest
import sys
from multiprocessing import Process
import signal
from __future__ import print_function
import numpy
import unittest
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid.layers.io import ListenAndServ
from paddle.fluid.layers.io import Recv
from paddle.fluid.layers.io import Send
import paddle.fluid.layers.ops as ops
class TestProgram2Code(unittest.TestCase):
@unittest.skipIf(sys.platform == "win32",
"Windows does not support distribution")
def test_print(self):
place = fluid.CPUPlace()
self.init_serv(place)
self.init_client(place, 9123)
def init_serv(self, place):
main = fluid.Program()
with fluid.program_guard(main):
serv = ListenAndServ("127.0.0.1:0", ["X"], optimizer_mode=False)
with serv.do():
out_var = main.global_block().create_var(
name="scale_0.tmp_0",
psersistable=True,
dtype="float32",
shape=[32, 32])
x = layers.data(
shape=[32, 32],
dtype='float32',
name="X",
append_batch_size=False)
fluid.initializer.Constant(value=1.0)(x, main.global_block())
ops._scale(x=x, scale=10.0, out=out_var)
print(main)
def init_client(self, place, port):
main = fluid.Program()
with fluid.program_guard(main):
x = layers.data(
shape=[32, 32],
dtype='float32',
name='X',
append_batch_size=False)
fluid.initializer.Constant(value=2.3)(x, main.global_block())
get_var = main.global_block().create_var(
name="scale_0.tmp_0", # server side var
dtype="float32",
persistable=False,
shape=[32, 32])
fluid.initializer.Constant(value=2.3)(get_var, main.global_block())
Send("127.0.0.1:%d" % port, [x])
o = Recv("127.0.0.1:%d" % port, [get_var])
print(main)
class TestProgramToReadableCode(unittest.TestCase):
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import sys
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid.layers.io import ListenAndServ
from paddle.fluid.layers.io import Recv
from paddle.fluid.layers.io import Send
import paddle.fluid.layers.ops as ops
class TestProgram2Code(unittest.TestCase):
@unittest.skipIf(sys.platform == "win32",
"Windows does not support distribution")
def test_print(self):
paddle.enable_static()
place = fluid.CPUPlace()
self.init_serv(place)
self.init_client(place, 9123)
def init_serv(self, place):
main = fluid.Program()
with fluid.program_guard(main):
serv = ListenAndServ("127.0.0.1:0", ["X"], optimizer_mode=False)
with serv.do():
out_var = main.global_block().create_var(
name="scale_0.tmp_0",
psersistable=True,
dtype="float32",
shape=[32, 32])
x = layers.data(
shape=[32, 32],
dtype='float32',
name="X",
append_batch_size=False)
fluid.initializer.Constant(value=1.0)(x, main.global_block())
ops._scale(x=x, scale=10.0, out=out_var)
print(main)
def init_client(self, place, port):
main = fluid.Program()
with fluid.program_guard(main):
x = layers.data(
shape=[32, 32],
dtype='float32',
name='X',
append_batch_size=False)
fluid.initializer.Constant(value=2.3)(x, main.global_block())
get_var = main.global_block().create_var(
name="scale_0.tmp_0", # server side var
dtype="float32",
persistable=False,
shape=[32, 32])
fluid.initializer.Constant(value=2.3)(get_var, main.global_block())
Send("127.0.0.1:%d" % port, [x])
o = Recv("127.0.0.1:%d" % port, [get_var])
print(main)
if __name__ == "__main__":
unittest.main()
......@@ -94,7 +94,7 @@ class TestTranslatedLayer(unittest.TestCase):
batch_size=BATCH_SIZE,
shuffle=True,
drop_last=True,
num_workers=2)
num_workers=0)
# train
train(self.layer, self.loader, self.loss_fn, self.sgd)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册