fixing default communication_data_type for bfloat16_enabled and docs (#3370)

Co-authored-by: N Alexander Jipa <azzhipa@amazon.com> Co-authored-by: N Olatunji Ruwase <olruwase@microsoft.com> Co-authored-by: N Logan Adams <114770087+loadams@users.noreply.github.com>

fixing default communication_data_type for bfloat16_enabled and docs (#3370)
Co-authored-by: N Alexander Jipa <azzhipa@amazon.com> Co-authored-by: N Olatunji Ruwase <olruwase@microsoft.com> Co-authored-by: N Logan Adams <114770087+loadams@users.noreply.github.com>
d56268f3 · Alexander Jipa · GitHub · 39825a90 · d56268f3 · d56268f3
3 changed file
--- a/deepspeed/runtime/engine.py
+++ b/deepspeed/runtime/engine.py
@@ -778,6 +778,9 @@ class DeepSpeedEngine(Module):
        if self.fp16_enabled():
            return torch.float16

+        if self.bfloat16_enabled():
+            return torch.bfloat16
+
        return torch.float32

    def postscale_gradients(self):

--- a/docs/_pages/config-json.md
+++ b/docs/_pages/config-json.md
@@ -181,7 +181,7 @@ Example of <i>**scheduler**</i>

 ### Communication options

-<i>**communication_data_type**</i>: [boolean]
+<i>**communication_data_type**</i>: [string]

 | Description                                                                                                                   | Default |
 | ----------------------------------------------------------------------------------------------------------------------------- | ------- |

--- a/tests/unit/runtime/half_precision/test_bf16.py
+++ b/tests/unit/runtime/half_precision/test_bf16.py
@@ -287,7 +287,7 @@ class TestZeroEmptyGrad(DistributedTest):


 @pytest.mark.parametrize("comp_type", [torch.float16, torch.bfloat16, torch.float], ids=["fp16", "bfp16", "fp32"])
-@pytest.mark.parametrize("comm_type", [torch.float16, torch.bfloat16], ids=["fp16", "bfp16"])
+@pytest.mark.parametrize("comm_type", [torch.float16, torch.bfloat16, None], ids=["fp16", "bfp16", "default"])
 class TestZeroDtypeCocktail(DistributedTest):
    world_size = 2

@@ -312,8 +312,11 @@ class TestZeroDtypeCocktail(DistributedTest):
            "zero_optimization": {
                "stage": 2
            },
-            "communication_data_type": type_str[comm_type]
        }
+        if comm_type is not None:
+            config_dict["communication_data_type"] = type_str[comm_type]
+        else:
+            comm_type = comp_type
        hidden_dim = 10

        model = SimpleModel(hidden_dim)