Fixes for torch 1.14 due to new torch.numel return type (#2522)

* fixes for new torch.numel return type * address comment

Fixes for torch 1.14 due to new torch.numel return type (#2522)
* fixes for new torch.numel return type * address comment
21105521 · Jeff Rasley · GitHub · 30c8d8a8 · 21105521 · 21105521
隐藏空白更改
内联并排

Showing with 17 addition and 17 deletion

deepspeed/profiling/flops_profiler/profiler.py deepspeed/profiling/flops_profiler/profiler.py +16 -16

deepspeed/runtime/comm/nccl.py deepspeed/runtime/comm/nccl.py +1 -1

未找到文件。
--- a/deepspeed/profiling/flops_profiler/profiler.py
+++ b/deepspeed/profiling/flops_profiler/profiler.py
@@ -480,38 +480,38 @@ def _prod(dims):

 def _linear_flops_compute(input, weight, bias=None):
    out_features = weight.shape[0]
-    macs = torch.numel(input) * out_features
+    macs = input.numel() * out_features
    return 2 * macs, macs


 def _relu_flops_compute(input, inplace=False):
-    return torch.numel(input), 0
+    return input.numel(), 0


 def _prelu_flops_compute(input: Tensor, weight: Tensor):
-    return torch.numel(input), 0
+    return input.numel(), 0


 def _elu_flops_compute(input: Tensor, alpha: float = 1.0, inplace: bool = False):
-    return torch.numel(input), 0
+    return input.numel(), 0


 def _leaky_relu_flops_compute(input: Tensor,
                              negative_slope: float = 0.01,
                              inplace: bool = False):
-    return torch.numel(input), 0
+    return input.numel(), 0


 def _relu6_flops_compute(input: Tensor, inplace: bool = False):
-    return torch.numel(input), 0
+    return input.numel(), 0


 def _silu_flops_compute(input: Tensor, inplace: bool = False):
-    return torch.numel(input), 0
+    return input.numel(), 0


 def _gelu_flops_compute(input):
-    return torch.numel(input), 0
+    return input.numel(), 0


 def _pool_flops_compute(input,
@@ -523,7 +523,7 @@ def _pool_flops_compute(input,
                        count_include_pad=True,
                        divisor_override=None,
                        return_indices=None):
-    return torch.numel(input), 0
+    return input.numel(), 0


 def _conv_flops_compute(input,
@@ -625,8 +625,8 @@ def _batch_norm_flops_compute(
    has_affine = weight is not None
    if training:
        # estimation
-        return torch.numel(input) * (5 if has_affine else 4), 0
-    flops = torch.numel(input) * (2 if has_affine else 1)
+        return input.numel() * (5 if has_affine else 4), 0
+    flops = input.numel() * (2 if has_affine else 1)
    return flops, 0


@@ -639,7 +639,7 @@ def _layer_norm_flops_compute(
 ):
    has_affine = weight is not None
    # estimation
-    return torch.numel(input) * (5 if has_affine else 4), 0
+    return input.numel() * (5 if has_affine else 4), 0


 def _group_norm_flops_compute(input: Tensor,
@@ -649,7 +649,7 @@ def _group_norm_flops_compute(input: Tensor,
                              eps: float = 1e-5):
    has_affine = weight is not None
    # estimation
-    return torch.numel(input) * (5 if has_affine else 4), 0
+    return input.numel() * (5 if has_affine else 4), 0


 def _instance_norm_flops_compute(
@@ -664,7 +664,7 @@ def _instance_norm_flops_compute(
 ):
    has_affine = weight is not None
    # estimation
-    return torch.numel(input) * (5 if has_affine else 4), 0
+    return input.numel() * (5 if has_affine else 4), 0


 def _upsample_flops_compute(input,
@@ -678,7 +678,7 @@ def _upsample_flops_compute(input,
        else:
            return int(size), 0
    assert scale_factor is not None, "either size or scale_factor should be defined"
-    flops = torch.numel(input)
+    flops = input.numel()
    if isinstance(scale_factor, tuple) and len(scale_factor) == len(input):
        flops * int(_prod(scale_factor))
    else:
@@ -687,7 +687,7 @@ def _upsample_flops_compute(input,


 def _softmax_flops_compute(input, dim=None, _stacklevel=3, dtype=None):
-    return torch.numel(input), 0
+    return input.numel(), 0


 def _embedding_flops_compute(

--- a/deepspeed/runtime/comm/nccl.py
+++ b/deepspeed/runtime/comm/nccl.py
@@ -68,7 +68,7 @@ class NcclBackend(object):
            buffer_m = torch.cat([buffer_m, empty_tensor])

        buffer_m.add_(worker_error)
-        worker_scale = torch.norm(buffer_m) / np.sqrt(torch.numel(buffer_m))
+        worker_scale = torch.norm(buffer_m) / np.sqrt(buffer_m.numel())
        worker_error.set_(buffer_m - worker_scale *
                          buffer_m.sign().add_(1).bool().float().add_(-0.5).mul_(2.0))