未验证 提交 5102c4ab 编写于 作者: M Molly Smith 提交者: GitHub

fused bias relu unittest (#2297)

上级 c8641032
"""
Copyright 2022 The Microsoft DeepSpeed Team
"""
import pytest
import torch
import deepspeed
from deepspeed.ops.op_builder import InferenceBuilder
if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("Inference ops are not available on this system",
allow_module_level=True)
inference_module = None
torch_minor_version = None
def allclose(x, y):
assert x.dtype == y.dtype
rtol, atol = {torch.float32: (5e-4, 5e-5), torch.float16: (3e-2, 2e-3)}[x.dtype]
return torch.allclose(x, y, rtol=rtol, atol=atol)
def run_bias_relu_reference(activations, bias):
# Expected behavior is that of casting to float32 internally
return torch.nn.functional.relu(
activations.to(torch.float32) + bias.to(torch.float32)).to(activations.dtype)
def run_bias_relu_ds(activations, bias):
global inference_module
if inference_module is None:
inference_module = InferenceBuilder().load()
if activations.dtype == torch.float16:
return inference_module.bias_relu_fp16(activations, bias)
else:
return inference_module.bias_relu_fp32(activations, bias)
@pytest.mark.inference
@pytest.mark.parametrize("batch", [1, 2])
@pytest.mark.parametrize("sequence", [1, 128, 255])
@pytest.mark.parametrize("channels", [512, 1232, 4096])
@pytest.mark.parametrize("dtype", [torch.float16, torch.float32])
def test_bias_relu(batch, sequence, channels, dtype):
activations_ds = torch.randn((batch, sequence, channels), dtype=dtype, device='cuda')
bias_ds = torch.randn((channels), dtype=dtype, device='cuda')
activations_ref = activations_ds.clone().detach()
bias_ref = bias_ds.clone().detach()
ds_out = run_bias_relu_ds(activations_ds, bias_ds)
ref_out = run_bias_relu_reference(activations_ref, bias_ref)
assert (allclose(ds_out, ref_out))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册