Migrate ops tests to new inference_ops marker (#2599)

* Migrate ops tests to new inference_ops marker * Disable by default * Add missing test cases * Reorder such that inference_ops will run[fail] first

Migrate ops tests to new inference_ops marker (#2599)
* Migrate ops tests to new inference_ops marker * Disable by default * Add missing test cases * Reorder such that inference_ops will run[fail] first
7425a365 · Connor Holmes · GitHub · acde873c · 7425a365 · 7425a365
13 changed file
--- a/.github/workflows/nv-inference.yml
+++ b/.github/workflows/nv-inference.yml
@@ -60,4 +60,5 @@ jobs:
          if [[ -d ./torch-extensions ]]; then rm -rf ./torch-extensions; fi
          cd tests
          TRANSFORMERS_CACHE=/blob/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 --verbose -m 'seq_inference' unit/ --torch_ver="1.13" --cuda_ver="11.6"
+          TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 --verbose -m 'inference_ops' unit/ --torch_ver="1.13" --cuda_ver="11.6"
          TRANSFORMERS_CACHE=/blob/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 --forked -n 4 --verbose -m 'inference' unit/ --torch_ver="1.13" --cuda_ver="11.6"
--- a/tests/pytest.ini
+++ b/tests/pytest.ini
 [pytest]
-addopts = -m "not sequential and not nightly and not inference and not seq_inference"
+addopts = -m "not sequential and not nightly and not inference and not seq_inference and not inference_ops"
 markers =
    sequential:Tests that need to be run sequentially
    inference:Inference model tests
+    inference_ops:Individual inference operator tests
    seq_inference:Inference model tests to run sequentially
    nightly:Tests that should be run nightly
--- a/tests/unit/ops/quantizer/test_dequantize.py
+++ b/tests/unit/ops/quantizer/test_dequantize.py
@@ -55,7 +55,7 @@ def run_ref_dequantize(quantized_data, params, num_groups, q_bits, is_symmetric_
        return (quantized_data * scales + offsets).to(torch.float16)


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("num_groups", [1, 13, 512])
 @pytest.mark.parametrize("num_elems",
                         [8,

--- a/tests/unit/ops/quantizer/test_fake_quantization.py
+++ b/tests/unit/ops/quantizer/test_fake_quantization.py
@@ -34,7 +34,7 @@ def run_quant_dequant(inputs, groups, bits):
    return quantizer_cuda_module.ds_quantize_fp16(inputs, groups, bits)


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("tensor_shape", [(16, 4096), (128, 256)])
 # Test with two tensor shapes as (16, 4096) and (128, 256).
 @pytest.mark.parametrize("groups", [1, 16])

--- a/tests/unit/ops/quantizer/test_quantize.py
+++ b/tests/unit/ops/quantizer/test_quantize.py
@@ -99,7 +99,7 @@ def run_float_quantize(q_bits, is_symmetric_quant, activations_ref, num_groups):
    return data_i8, params


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("num_groups", [1, 13, 512])
 @pytest.mark.parametrize("num_elems",
                         [8,
@@ -198,7 +198,7 @@ def run_integer_quantize(q_bits, activations_ref, num_groups):
    return data_i8, max_abs_activations_ref.to(torch.int32)


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("num_groups", [1, 2, 4, 8, 16, 32, 64, 512])
 @pytest.mark.parametrize("num_elems", [4096, 8192, 12288, 16384])
 @pytest.mark.parametrize("q_bits", [4, 8])

--- a/tests/unit/ops/spatial/test_nhwc_bias_add.py
+++ b/tests/unit/ops/spatial/test_nhwc_bias_add.py
@@ -35,7 +35,7 @@ channels_list = [
 ]


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2, 10])
 @pytest.mark.parametrize("image_size", [16, 32, 64])
 @pytest.mark.parametrize("channels", channels_list)
@@ -58,7 +58,7 @@ def ref_bias_add_add(activations, bias, other):
    return (activations + bias.reshape(1, -1, 1, 1)) + other


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2, 10])
 @pytest.mark.parametrize("image_size", [16, 32, 64])
 @pytest.mark.parametrize("channels", channels_list)
@@ -93,7 +93,7 @@ def ref_bias_add_bias_add(activations, bias, other, other_bias):
                                                                         1))


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2, 10])
 @pytest.mark.parametrize("image_size", [16, 32, 64])
 @pytest.mark.parametrize("channels", channels_list)

--- a/tests/unit/ops/transformer/inference/test_bias_add.py
+++ b/tests/unit/ops/transformer/inference/test_bias_add.py
@@ -31,7 +31,7 @@ def run_bias_add_ds(activations, bias):
        return inference_module.bias_add_fp32(activations, bias)


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2])
 @pytest.mark.parametrize("sequence", [1, 128, 255])
 @pytest.mark.parametrize("channels", [512, 1232, 4096])

--- a/tests/unit/ops/transformer/inference/test_bias_geglu.py
+++ b/tests/unit/ops/transformer/inference/test_bias_geglu.py
@@ -37,7 +37,7 @@ def run_bias_geglu_ds(activation, bias):
    return inference_module.bias_geglu(activation, bias)


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2])
 @pytest.mark.parametrize("sequence", [1, 128, 255])
 @pytest.mark.parametrize("channels", [512, 1232, 4096])

--- a/tests/unit/ops/transformer/inference/test_bias_gelu.py
+++ b/tests/unit/ops/transformer/inference/test_bias_gelu.py
@@ -48,7 +48,7 @@ def run_bias_gelu_ds(activations, bias):
        return inference_module.bias_gelu_fp32(activations, bias)


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2])
 @pytest.mark.parametrize("sequence", [1, 128, 255])
 @pytest.mark.parametrize("channels", [512, 1232, 4096])

--- a/tests/unit/ops/transformer/inference/test_bias_relu.py
+++ b/tests/unit/ops/transformer/inference/test_bias_relu.py
@@ -37,7 +37,7 @@ def run_bias_relu_ds(activations, bias):
        return inference_module.bias_relu_fp32(activations, bias)


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2])
 @pytest.mark.parametrize("sequence", [1, 128, 255])
 @pytest.mark.parametrize("channels", [512, 1232, 4096])

--- a/tests/unit/ops/transformer/inference/test_layer_norm.py
+++ b/tests/unit/ops/transformer/inference/test_layer_norm.py
@@ -38,7 +38,7 @@ def ds_implementation(vals, gamma, beta, epsilon):
    return inference_module.layer_norm(vals, gamma, beta, epsilon)


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 32])
 @pytest.mark.parametrize("seq_len", [1, 128])
 @pytest.mark.parametrize("channels", [384, 512, 768, 1024, 2048, 8192, 14432])
@@ -79,7 +79,7 @@ def residual_ds_implementation(vals, bias, res, gamma, beta, epsilon):
    return inference_module._layer_norm_residual(vals, bias, res, gamma, beta, epsilon)


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 32])
 @pytest.mark.parametrize("seq_len", [1, 128])
 @pytest.mark.parametrize("channels", [384, 512, 768, 1024, 2048, 8192, 14432])
@@ -147,7 +147,7 @@ def residual_store_ds_implementation(vals, bias, res, gamma, beta, epsilon):
                                                      epsilon)


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 32])
 @pytest.mark.parametrize("seq_len", [1, 128])
 @pytest.mark.parametrize("channels", [384, 512, 768, 1024, 2048, 8192, 14432])

--- a/tests/unit/ops/transformer/inference/test_moe_res_matmult.py
+++ b/tests/unit/ops/transformer/inference/test_moe_res_matmult.py
@@ -32,7 +32,7 @@ def run_moe_res_matmul_ds(residual, coef, output):
    return inference_module.moe_res_matmul(residual, coef_t, output)


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("hidden_dim", [16, 64])
 @pytest.mark.parametrize("c", [1, 4])
 @pytest.mark.parametrize("dtype", [torch.float32, torch.float16])

--- a/tests/unit/ops/transformer/inference/test_residual_add.py
+++ b/tests/unit/ops/transformer/inference/test_residual_add.py
@@ -77,7 +77,7 @@ def run_residual_add_reference(hidden_state,
                                     mp_size)


-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2])
 @pytest.mark.parametrize("sequence", [1, 128, 255])
 @pytest.mark.parametrize("hidden_dim", [512, 1232, 4096])