未验证 提交 425bd439 编写于 作者: qq_22305325's avatar qq_22305325 提交者: GitHub

Fix eager test bug (#4678)

* skip test_gpt_data_loader in eager mode

* 1_node_fix_egaer_test_bug

* remove useless head file

* skip tensor and module

* skip 2-D sbp in eager mode

* fix error

* fix bug and remove some skip under eager

* fix error

* del oneflow_api

* rm test_tensor.py

* skip test_summary in eager mode

* skip test_stateful_local_kernel under cpu only mode

* add class AsyncCudaStreamType

* fix bug

* import os

* remove BlobObject::is_python_shutting_down_

* fix error

* sikp 2d sbp

* minor fix

* refine comment

* make of_format
Co-authored-by: Nlixinqi <lixinqi0703106@163.com>
Co-authored-by: Noneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>
上级 422ced27
......@@ -1040,7 +1040,7 @@ void JobBuildAndInferCtx::InferBlobBackwardSignature(Operator* op) {
void JobBuildAndInferCtx::InferBlobBackwardSignature(
const Operator& op, std::function<bool(const LogicalBlobId&)>* IsLbiBackwardUsed) {
const bool is_train = job().job_conf().has_train_conf();
if (is_train) {
if (!is_train) {
*IsLbiBackwardUsed = [](const LogicalBlobId&) { return false; };
return;
}
......
......@@ -390,7 +390,7 @@ def _EagerRunModelLoad(var_op_conf, snapshot_path):
def _EagerRunModelSave(var_blobs, snapshot_path):
path_input_op_conf, path_lbi = _GenModelIOPathInputOpConfAndRetLbi()
path_input_blob_objects = {}
path_input_blob_objects = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
(
BuildModelIOPathInputInstruction,
BuildFeedPathInstruction,
......
......@@ -41,7 +41,13 @@ def gen_gather_test_sample(input_shape, index_shape, dim, is_float=True):
output = np.take_along_axis(input, index, dim)
grad = _np_dim_scatter_add(np.ones_like(output), dim, index, input_shape)
ret = {"input": input, "index": index, "dim": dim, "output": output, "grad": grad}
ret = {
"input": input.astype(np.float32),
"index": index.astype(np.int32),
"dim": dim,
"output": output.astype(np.float32),
"grad": grad.astype(np.float32),
}
return ret
......@@ -94,9 +100,6 @@ def _compare_dim_gather_with_samples(test_case, inputshape, indexshape, dim, max
class TestDynamicDimGather(flow.unittest.TestCase):
@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
def test_dynamic_dim_gather(test_case):
if flow.eager_execution_enabled():
print("\nSkip under erger mode!")
return
_compare_dim_gather_with_samples(
test_case, inputshape=(2, 2), indexshape=(2, 2), dim=1, maxshape=(10, 10)
)
......
......@@ -179,9 +179,6 @@ def compare_with_not_fused(
class TestFusedBiasAdd(flow.unittest.TestCase):
@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
def test_fused_bias_add(test_case):
if flow.eager_execution_enabled():
print("\nSkip under erger mode!")
return
arg_dict = OrderedDict()
arg_dict["device_type"] = ["gpu"]
arg_dict["x_shape"] = [
......
......@@ -133,9 +133,6 @@ def compare_with_not_fused(test_case, device_type, x_shape, data_type, data_form
class TestFusedBiasAdd(flow.unittest.TestCase):
@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
def test_fused_bias_add(test_case):
if flow.eager_execution_enabled():
print("\nSkip under erger mode!")
return
arg_dict = OrderedDict()
arg_dict["device_type"] = ["gpu"]
arg_dict["x_shape"] = [
......
......@@ -130,9 +130,6 @@ def compare_with_not_fused(
class TestFusedScaleTrilSoftmaxDropout(flow.unittest.TestCase):
@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
def test_fused_scale_tril_softmax_dropout(test_case):
if flow.eager_execution_enabled():
print("\nSkip under erger mode!")
return
arg_dict = OrderedDict()
arg_dict["device_type"] = ["gpu"]
arg_dict["x_shape"] = [
......
......@@ -79,7 +79,10 @@ def _make_gather_nd_fn(
def do_gather_nd(x, index):
x_var = flow.get_variable(
"params", shape=(1,), dtype=x_dtype, initializer=flow.zeros_initializer(),
"params",
shape=(1,),
dtype=x_dtype,
initializer=flow.constant_initializer(0, x_dtype),
)
x = x + flow.cast_to_current_logical_view(x_var)
y = flow.gather_nd(x, index)
......
......@@ -96,6 +96,10 @@ class TestGPTDataLoader(flow.unittest.TestCase):
RANDOM_SEED = 12345
@flow.unittest.skip_unless_1n1d()
@unittest.skipIf(
flow.unittest.env.eager_execution_enabled(),
"2-D SBP doesn't work in eager mode",
)
def test_simple(self):
of_gpt_data_loader_fn = _make_gpt_data_loader_func(
data_file_prefix=self.DATA_FILE_PREFIX,
......@@ -117,6 +121,10 @@ class TestGPTDataLoader(flow.unittest.TestCase):
)
self.assertTrue(np.array_equal(tokens, cmp_tokens))
@unittest.skipIf(
flow.unittest.env.eager_execution_enabled(),
"2-D SBP doesn't work in eager mode",
)
def test_1n1d(self):
of_gpt_data_loader_fn = _make_gpt_data_loader_func(
data_file_prefix=self.DATA_FILE_PREFIX,
......@@ -137,6 +145,10 @@ class TestGPTDataLoader(flow.unittest.TestCase):
return np.stack(tokens_list, axis=0)
@flow.unittest.skip_unless_1n4d()
@unittest.skipIf(
flow.unittest.env.eager_execution_enabled(),
"2-D SBP doesn't work in eager mode",
)
def test_1n4d(self):
of_gpt_data_loader_fn = _make_gpt_data_loader_func(
data_file_prefix=self.DATA_FILE_PREFIX,
......@@ -163,6 +175,10 @@ class TestGPTDataLoader(flow.unittest.TestCase):
return result_1n4d
@flow.unittest.skip_unless_2n4d()
@unittest.skipIf(
flow.unittest.env.eager_execution_enabled(),
"2-D SBP doesn't work in eager mode",
)
def test_2n4d(self):
of_gpt_data_loader_fn = _make_gpt_data_loader_func(
data_file_prefix=self.DATA_FILE_PREFIX,
......
......@@ -69,6 +69,9 @@ def _test(test_case, device_num):
@flow.unittest.skip_unless_1n2d()
@unittest.skipIf(
flow.unittest.env.eager_execution_enabled(), "2-D SBP doesn't work in eager mode",
)
class TestParallelCast(flow.unittest.TestCase):
@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
def test_on_gpu(test_case):
......@@ -350,6 +353,9 @@ def _test_reshape_like(test_case):
@flow.unittest.skip_unless_1n4d()
@unittest.skipIf(
flow.unittest.env.eager_execution_enabled(), "2-D SBP doesn't work in eager mode",
)
class TestHierarchicalParallelCast(flow.unittest.TestCase):
def test_change_axis1(test_case):
arg_dict = OrderedDict()
......
......@@ -28,10 +28,6 @@ class TestInterfaceOpReadAndWrite(flow.unittest.TestCase):
def test(test_case):
flow.config.gpu_device_num(2)
if flow.eager_execution_enabled():
print("\nSkip under erger mode!")
return
@flow.global_function()
def add() -> tp.Numpy:
with flow.scope.placement("gpu", "0:0-1"):
......@@ -50,6 +46,9 @@ class TestInterfaceOpReadAndWrite(flow.unittest.TestCase):
# NOTE(chengcheng): Should retain for session init before set_interface_blob_value
flow.train.CheckPoint().init()
if flow.eager_execution_enabled():
add()
x_value = np.random.random((2, 3)).astype(np.float32)
y_value = np.random.random((2, 3)).astype(np.float32)
flow.experimental.set_interface_blob_value("x", x_value)
......
......@@ -16,7 +16,7 @@ limitations under the License.
import unittest
from typing import Tuple
import oneflow as flow
import oneflow.experimental as flow
import oneflow.typing as tp
......
......@@ -1040,6 +1040,10 @@ class TestOptimizers(flow.unittest.TestCase):
for arg in GenArgList(arg_dict):
compare_with_numpy_indexed_slices_sgd(*arg)
@unittest.skipIf(
flow.unittest.env.eager_execution_enabled(),
"indexed slices sgdw doesn't work in eager mode",
)
def test_indexed_slices_sgdw(test_case):
arg_dict = OrderedDict()
arg_dict["device_type"] = ["gpu", "cpu"]
......@@ -1069,6 +1073,10 @@ class TestOptimizers(flow.unittest.TestCase):
for arg in GenArgList(arg_dict):
compare_with_numpy_indexed_slices_adam(*arg)
@unittest.skipIf(
flow.unittest.env.eager_execution_enabled(),
"indexed slices adamw doesn't work in eager mode",
)
def test_indexed_slices_adamw(test_case):
arg_dict = OrderedDict()
arg_dict["device_type"] = ["gpu", "cpu"]
......
......@@ -71,6 +71,10 @@ def _test(test_case, device_num):
@flow.unittest.skip_unless_1n2d()
@unittest.skipIf(
flow.unittest.env.eager_execution_enabled(),
"Parallel cast SBP doesn't work in eager mode",
)
class TestParallelCast(flow.unittest.TestCase):
@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
def test_on_gpu(test_case):
......
......@@ -69,8 +69,6 @@ def compare_with_tensorflow(device_type, data_type, shape):
)
# OneFlow
check_point = flow.train.CheckPoint()
check_point.init()
of_out = SigmoidCrossEntropyWithLogitsJob(labels).get()
# TensorFlow
......@@ -95,9 +93,6 @@ def compare_with_tensorflow(device_type, data_type, shape):
@flow.unittest.skip_unless_1n1d()
class TestSigmoidCrossEntropy(flow.unittest.TestCase):
def test_sigmoid_cross_entropy_with_logits(test_case):
if flow.eager_execution_enabled():
print("\nSkip under erger mode!")
return
arg_dict = OrderedDict()
arg_dict["device_type"] = ["gpu", "cpu"]
arg_dict["data_type"] = ["double", "float32"]
......
......@@ -97,9 +97,6 @@ def compare_with_tensorflow(device_type, x_shape, data_type, axis):
class TestSoftmax(flow.unittest.TestCase):
@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
def test_softmax_shape(test_case):
if flow.eager_execution_enabled():
print("\nSkip under erger mode!")
return
arg_dict = OrderedDict()
arg_dict["device_type"] = ["gpu", "cpu"]
arg_dict["x_shape"] = [
......@@ -125,9 +122,6 @@ class TestSoftmax(flow.unittest.TestCase):
compare_with_tensorflow(*arg)
def test_softmax_axis(test_case):
if flow.eager_execution_enabled():
print("\nSkip under erger mode!")
return
arg_dict = OrderedDict()
arg_dict["device_type"] = ["gpu", "cpu"]
arg_dict["x_shape"] = [(10, 20, 30, 40)]
......
......@@ -123,9 +123,6 @@ def compare_with_tensorflow(device_type, data_type, shape):
@flow.unittest.skip_unless_1n1d()
class TestSoftmaxCrossEntropy(flow.unittest.TestCase):
def test_softmax_cross_entropy_with_logits(test_case):
if flow.eager_execution_enabled():
print("\nSkip under erger mode!")
return
arg_dict = OrderedDict()
arg_dict["device_type"] = ["gpu", "cpu"]
arg_dict["data_type"] = ["double", "float32", "float16"]
......
......@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
import unittest
import os
import oneflow as flow
......@@ -22,6 +22,7 @@ import oneflow as flow
not flow.unittest.env.eager_execution_enabled(),
".numpy() doesn't work in lazy mode",
)
@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
class TestStatefulLocalKernel(flow.unittest.TestCase):
def test_dynamic_attrs(test_case):
x = (
......
......@@ -181,6 +181,7 @@ def summary_demo():
@flow.unittest.skip_unless_1n1d()
class TestSummary(flow.unittest.TestCase):
@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
@unittest.skipIf(os.getenv("ONEFLOW_TEST_ENABLE_EAGER"), "only test lazy cases")
def test_summary(test_case):
summary_demo()
......
......@@ -34,6 +34,9 @@ class TestWatch(flow.unittest.TestCase):
ReluJob(data)
@unittest.skipIf(
flow.unittest.env.eager_execution_enabled(), "Doesn't work in eager mode",
)
def test_two_device(test_case):
flow.config.gpu_device_num(2)
data = np.ones((10,), dtype=np.float32)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册