Fix eager test bug (#4678)

* skip test_gpt_data_loader in eager mode * 1_node_fix_egaer_test_bug * remove useless head file * skip tensor and module * skip 2-D sbp in eager mode * fix error * fix bug and remove some skip under eager * fix error * del oneflow_api * rm test_tensor.py * skip test_summary in eager mode * skip test_stateful_local_kernel under cpu only mode * add class AsyncCudaStreamType * fix bug * import os * remove BlobObject::is_python_shutting_down_ * fix error * sikp 2d sbp * minor fix * refine comment * make of_format Co-authored-by: N lixinqi <lixinqi0703106@163.com> Co-authored-by: N oneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>

Fix eager test bug (#4678)
* skip test_gpt_data_loader in eager mode * 1_node_fix_egaer_test_bug * remove useless head file * skip tensor and module * skip 2-D sbp in eager mode * fix error * fix bug and remove some skip under eager * fix error * del oneflow_api * rm test_tensor.py * skip test_summary in eager mode * skip test_stateful_local_kernel under cpu only mode * add class AsyncCudaStreamType * fix bug * import os * remove BlobObject::is_python_shutting_down_ * fix error * sikp 2d sbp * minor fix * refine comment * make of_format Co-authored-by: N lixinqi <lixinqi0703106@163.com> Co-authored-by: N oneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>
425bd439 · qq_22305325 · GitHub · 422ced27 · 425bd439 · 425bd439
19 changed file
--- a/oneflow/core/job/job_build_and_infer_ctx.cpp
+++ b/oneflow/core/job/job_build_and_infer_ctx.cpp
@@ -1040,7 +1040,7 @@ void JobBuildAndInferCtx::InferBlobBackwardSignature(Operator* op) {
 void JobBuildAndInferCtx::InferBlobBackwardSignature(
    const Operator& op, std::function<bool(const LogicalBlobId&)>* IsLbiBackwardUsed) {
  const bool is_train = job().job_conf().has_train_conf();
-  if (is_train) {
+  if (!is_train) {
    *IsLbiBackwardUsed = [](const LogicalBlobId&) { return false; };
    return;
  }

--- a/oneflow/python/eager/op_executor.py
+++ b/oneflow/python/eager/op_executor.py
@@ -390,7 +390,7 @@ def _EagerRunModelLoad(var_op_conf, snapshot_path):

 def _EagerRunModelSave(var_blobs, snapshot_path):
    path_input_op_conf, path_lbi = _GenModelIOPathInputOpConfAndRetLbi()
-    path_input_blob_objects = {}
+    path_input_blob_objects = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
    (
        BuildModelIOPathInputInstruction,
        BuildFeedPathInstruction,

--- a/oneflow/python/test/ops/test_dim_gather_dynamic.py
+++ b/oneflow/python/test/ops/test_dim_gather_dynamic.py
@@ -41,7 +41,13 @@ def gen_gather_test_sample(input_shape, index_shape, dim, is_float=True):
    output = np.take_along_axis(input, index, dim)
    grad = _np_dim_scatter_add(np.ones_like(output), dim, index, input_shape)

-    ret = {"input": input, "index": index, "dim": dim, "output": output, "grad": grad}
+    ret = {
+        "input": input.astype(np.float32),
+        "index": index.astype(np.int32),
+        "dim": dim,
+        "output": output.astype(np.float32),
+        "grad": grad.astype(np.float32),
+    }
    return ret


@@ -94,9 +100,6 @@ def _compare_dim_gather_with_samples(test_case, inputshape, indexshape, dim, max
 class TestDynamicDimGather(flow.unittest.TestCase):
    @unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
    def test_dynamic_dim_gather(test_case):
-        if flow.eager_execution_enabled():
-            print("\nSkip under erger mode!")
-            return
        _compare_dim_gather_with_samples(
            test_case, inputshape=(2, 2), indexshape=(2, 2), dim=1, maxshape=(10, 10)
        )

--- a/oneflow/python/test/ops/test_fused_bias_add_dropout.py
+++ b/oneflow/python/test/ops/test_fused_bias_add_dropout.py
@@ -179,9 +179,6 @@ def compare_with_not_fused(
 class TestFusedBiasAdd(flow.unittest.TestCase):
    @unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
    def test_fused_bias_add(test_case):
-        if flow.eager_execution_enabled():
-            print("\nSkip under erger mode!")
-            return
        arg_dict = OrderedDict()
        arg_dict["device_type"] = ["gpu"]
        arg_dict["x_shape"] = [

--- a/oneflow/python/test/ops/test_fused_bias_add_gelu.py
+++ b/oneflow/python/test/ops/test_fused_bias_add_gelu.py
@@ -133,9 +133,6 @@ def compare_with_not_fused(test_case, device_type, x_shape, data_type, data_form
 class TestFusedBiasAdd(flow.unittest.TestCase):
    @unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
    def test_fused_bias_add(test_case):
-        if flow.eager_execution_enabled():
-            print("\nSkip under erger mode!")
-            return
        arg_dict = OrderedDict()
        arg_dict["device_type"] = ["gpu"]
        arg_dict["x_shape"] = [

--- a/oneflow/python/test/ops/test_fused_scale_tril_softmax_mask_and_scale.py
+++ b/oneflow/python/test/ops/test_fused_scale_tril_softmax_mask_and_scale.py
@@ -130,9 +130,6 @@ def compare_with_not_fused(
 class TestFusedScaleTrilSoftmaxDropout(flow.unittest.TestCase):
    @unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
    def test_fused_scale_tril_softmax_dropout(test_case):
-        if flow.eager_execution_enabled():
-            print("\nSkip under erger mode!")
-            return
        arg_dict = OrderedDict()
        arg_dict["device_type"] = ["gpu"]
        arg_dict["x_shape"] = [

--- a/oneflow/python/test/ops/test_gather_nd.py
+++ b/oneflow/python/test/ops/test_gather_nd.py
@@ -79,7 +79,10 @@ def _make_gather_nd_fn(

    def do_gather_nd(x, index):
        x_var = flow.get_variable(
-            "params", shape=(1,), dtype=x_dtype, initializer=flow.zeros_initializer(),
+            "params",
+            shape=(1,),
+            dtype=x_dtype,
+            initializer=flow.constant_initializer(0, x_dtype),
        )
        x = x + flow.cast_to_current_logical_view(x_var)
        y = flow.gather_nd(x, index)

--- a/oneflow/python/test/ops/test_gpt_data_loader.py
+++ b/oneflow/python/test/ops/test_gpt_data_loader.py
@@ -96,6 +96,10 @@ class TestGPTDataLoader(flow.unittest.TestCase):
    RANDOM_SEED = 12345

    @flow.unittest.skip_unless_1n1d()
+    @unittest.skipIf(
+        flow.unittest.env.eager_execution_enabled(),
+        "2-D SBP doesn't work in eager mode",
+    )
    def test_simple(self):
        of_gpt_data_loader_fn = _make_gpt_data_loader_func(
            data_file_prefix=self.DATA_FILE_PREFIX,
@@ -117,6 +121,10 @@ class TestGPTDataLoader(flow.unittest.TestCase):
        )
        self.assertTrue(np.array_equal(tokens, cmp_tokens))

+    @unittest.skipIf(
+        flow.unittest.env.eager_execution_enabled(),
+        "2-D SBP doesn't work in eager mode",
+    )
    def test_1n1d(self):
        of_gpt_data_loader_fn = _make_gpt_data_loader_func(
            data_file_prefix=self.DATA_FILE_PREFIX,
@@ -137,6 +145,10 @@ class TestGPTDataLoader(flow.unittest.TestCase):
        return np.stack(tokens_list, axis=0)

    @flow.unittest.skip_unless_1n4d()
+    @unittest.skipIf(
+        flow.unittest.env.eager_execution_enabled(),
+        "2-D SBP doesn't work in eager mode",
+    )
    def test_1n4d(self):
        of_gpt_data_loader_fn = _make_gpt_data_loader_func(
            data_file_prefix=self.DATA_FILE_PREFIX,
@@ -163,6 +175,10 @@ class TestGPTDataLoader(flow.unittest.TestCase):
        return result_1n4d

    @flow.unittest.skip_unless_2n4d()
+    @unittest.skipIf(
+        flow.unittest.env.eager_execution_enabled(),
+        "2-D SBP doesn't work in eager mode",
+    )
    def test_2n4d(self):
        of_gpt_data_loader_fn = _make_gpt_data_loader_func(
            data_file_prefix=self.DATA_FILE_PREFIX,

--- a/oneflow/python/test/ops/test_hierarchical_parallel_cast.py
+++ b/oneflow/python/test/ops/test_hierarchical_parallel_cast.py
@@ -69,6 +69,9 @@ def _test(test_case, device_num):


 @flow.unittest.skip_unless_1n2d()
+@unittest.skipIf(
+    flow.unittest.env.eager_execution_enabled(), "2-D SBP doesn't work in eager mode",
+)
 class TestParallelCast(flow.unittest.TestCase):
    @unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
    def test_on_gpu(test_case):
@@ -350,6 +353,9 @@ def _test_reshape_like(test_case):


 @flow.unittest.skip_unless_1n4d()
+@unittest.skipIf(
+    flow.unittest.env.eager_execution_enabled(), "2-D SBP doesn't work in eager mode",
+)
 class TestHierarchicalParallelCast(flow.unittest.TestCase):
    def test_change_axis1(test_case):
        arg_dict = OrderedDict()

--- a/oneflow/python/test/ops/test_interface_op_read_and_write.py
+++ b/oneflow/python/test/ops/test_interface_op_read_and_write.py
@@ -28,10 +28,6 @@ class TestInterfaceOpReadAndWrite(flow.unittest.TestCase):
    def test(test_case):
        flow.config.gpu_device_num(2)

-        if flow.eager_execution_enabled():
-            print("\nSkip under erger mode!")
-            return
-
        @flow.global_function()
        def add() -> tp.Numpy:
            with flow.scope.placement("gpu", "0:0-1"):
@@ -50,6 +46,9 @@ class TestInterfaceOpReadAndWrite(flow.unittest.TestCase):
        # NOTE(chengcheng): Should retain for session init before set_interface_blob_value
        flow.train.CheckPoint().init()

+        if flow.eager_execution_enabled():
+            add()
+
        x_value = np.random.random((2, 3)).astype(np.float32)
        y_value = np.random.random((2, 3)).astype(np.float32)
        flow.experimental.set_interface_blob_value("x", x_value)

--- a/oneflow/python/test/ops/test_module_container.py
+++ b/oneflow/python/test/ops/test_module_container.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 from typing import Tuple

-import oneflow as flow
+import oneflow.experimental as flow
 import oneflow.typing as tp



--- a/oneflow/python/test/ops/test_optimizers.py
+++ b/oneflow/python/test/ops/test_optimizers.py
@@ -1040,6 +1040,10 @@ class TestOptimizers(flow.unittest.TestCase):
        for arg in GenArgList(arg_dict):
            compare_with_numpy_indexed_slices_sgd(*arg)

+    @unittest.skipIf(
+        flow.unittest.env.eager_execution_enabled(),
+        "indexed slices sgdw doesn't work in eager mode",
+    )
    def test_indexed_slices_sgdw(test_case):
        arg_dict = OrderedDict()
        arg_dict["device_type"] = ["gpu", "cpu"]
@@ -1069,6 +1073,10 @@ class TestOptimizers(flow.unittest.TestCase):
        for arg in GenArgList(arg_dict):
            compare_with_numpy_indexed_slices_adam(*arg)

+    @unittest.skipIf(
+        flow.unittest.env.eager_execution_enabled(),
+        "indexed slices adamw doesn't work in eager mode",
+    )
    def test_indexed_slices_adamw(test_case):
        arg_dict = OrderedDict()
        arg_dict["device_type"] = ["gpu", "cpu"]

--- a/oneflow/python/test/ops/test_parallel_cast.py
+++ b/oneflow/python/test/ops/test_parallel_cast.py
@@ -71,6 +71,10 @@ def _test(test_case, device_num):


 @flow.unittest.skip_unless_1n2d()
+@unittest.skipIf(
+    flow.unittest.env.eager_execution_enabled(),
+    "Parallel cast SBP doesn't work in eager mode",
+)
 class TestParallelCast(flow.unittest.TestCase):
    @unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
    def test_on_gpu(test_case):

--- a/oneflow/python/test/ops/test_sigmoid_cross_entropy.py
+++ b/oneflow/python/test/ops/test_sigmoid_cross_entropy.py
@@ -69,8 +69,6 @@ def compare_with_tensorflow(device_type, data_type, shape):
    )

    # OneFlow
-    check_point = flow.train.CheckPoint()
-    check_point.init()
    of_out = SigmoidCrossEntropyWithLogitsJob(labels).get()

    # TensorFlow
@@ -95,9 +93,6 @@ def compare_with_tensorflow(device_type, data_type, shape):
 @flow.unittest.skip_unless_1n1d()
 class TestSigmoidCrossEntropy(flow.unittest.TestCase):
    def test_sigmoid_cross_entropy_with_logits(test_case):
-        if flow.eager_execution_enabled():
-            print("\nSkip under erger mode!")
-            return
        arg_dict = OrderedDict()
        arg_dict["device_type"] = ["gpu", "cpu"]
        arg_dict["data_type"] = ["double", "float32"]

--- a/oneflow/python/test/ops/test_softmax.py
+++ b/oneflow/python/test/ops/test_softmax.py
@@ -97,9 +97,6 @@ def compare_with_tensorflow(device_type, x_shape, data_type, axis):
 class TestSoftmax(flow.unittest.TestCase):
    @unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
    def test_softmax_shape(test_case):
-        if flow.eager_execution_enabled():
-            print("\nSkip under erger mode!")
-            return
        arg_dict = OrderedDict()
        arg_dict["device_type"] = ["gpu", "cpu"]
        arg_dict["x_shape"] = [
@@ -125,9 +122,6 @@ class TestSoftmax(flow.unittest.TestCase):
            compare_with_tensorflow(*arg)

    def test_softmax_axis(test_case):
-        if flow.eager_execution_enabled():
-            print("\nSkip under erger mode!")
-            return
        arg_dict = OrderedDict()
        arg_dict["device_type"] = ["gpu", "cpu"]
        arg_dict["x_shape"] = [(10, 20, 30, 40)]

--- a/oneflow/python/test/ops/test_softmax_cross_entropy.py
+++ b/oneflow/python/test/ops/test_softmax_cross_entropy.py
@@ -123,9 +123,6 @@ def compare_with_tensorflow(device_type, data_type, shape):
 @flow.unittest.skip_unless_1n1d()
 class TestSoftmaxCrossEntropy(flow.unittest.TestCase):
    def test_softmax_cross_entropy_with_logits(test_case):
-        if flow.eager_execution_enabled():
-            print("\nSkip under erger mode!")
-            return
        arg_dict = OrderedDict()
        arg_dict["device_type"] = ["gpu", "cpu"]
        arg_dict["data_type"] = ["double", "float32", "float16"]

--- a/oneflow/python/test/ops/test_stateful_local_kernel.py
+++ b/oneflow/python/test/ops/test_stateful_local_kernel.py
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import unittest
-
+import os
 import oneflow as flow


@@ -22,6 +22,7 @@ import oneflow as flow
    not flow.unittest.env.eager_execution_enabled(),
    ".numpy() doesn't work in lazy mode",
 )
+@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
 class TestStatefulLocalKernel(flow.unittest.TestCase):
    def test_dynamic_attrs(test_case):
        x = (

--- a/oneflow/python/test/ops/test_summary.py
+++ b/oneflow/python/test/ops/test_summary.py
@@ -181,6 +181,7 @@ def summary_demo():
 @flow.unittest.skip_unless_1n1d()
 class TestSummary(flow.unittest.TestCase):
    @unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
+    @unittest.skipIf(os.getenv("ONEFLOW_TEST_ENABLE_EAGER"), "only test lazy cases")
    def test_summary(test_case):
        summary_demo()


--- a/oneflow/python/test/ops/test_watch.py
+++ b/oneflow/python/test/ops/test_watch.py
@@ -34,6 +34,9 @@ class TestWatch(flow.unittest.TestCase):

        ReluJob(data)

+    @unittest.skipIf(
+        flow.unittest.env.eager_execution_enabled(), "Doesn't work in eager mode",
+    )
    def test_two_device(test_case):
        flow.config.gpu_device_num(2)
        data = np.ones((10,), dtype=np.float32)