diff --git a/.github/workflows/nv-inference.yml b/.github/workflows/nv-inference.yml index 9879279ab1ef550fda473a80b139dee6df1a46b5..dac0462611d74af2b32239ea0745c87964a949af 100644 --- a/.github/workflows/nv-inference.yml +++ b/.github/workflows/nv-inference.yml @@ -60,4 +60,5 @@ jobs: if [[ -d ./torch-extensions ]]; then rm -rf ./torch-extensions; fi cd tests TRANSFORMERS_CACHE=/blob/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 --verbose -m 'seq_inference' unit/ --torch_ver="1.13" --cuda_ver="11.6" + TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 --verbose -m 'inference_ops' unit/ --torch_ver="1.13" --cuda_ver="11.6" TRANSFORMERS_CACHE=/blob/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 --forked -n 4 --verbose -m 'inference' unit/ --torch_ver="1.13" --cuda_ver="11.6" diff --git a/tests/pytest.ini b/tests/pytest.ini index b7ee315be8018581b073df54de66ca8f524b4d52..08b666867b79f11a43abed1b802ae1cf125fb7e0 100644 --- a/tests/pytest.ini +++ b/tests/pytest.ini @@ -1,7 +1,8 @@ [pytest] -addopts = -m "not sequential and not nightly and not inference and not seq_inference" +addopts = -m "not sequential and not nightly and not inference and not seq_inference and not inference_ops" markers = sequential:Tests that need to be run sequentially inference:Inference model tests + inference_ops:Individual inference operator tests seq_inference:Inference model tests to run sequentially nightly:Tests that should be run nightly diff --git a/tests/unit/ops/quantizer/test_dequantize.py b/tests/unit/ops/quantizer/test_dequantize.py index bfa4bf3e85be922219265211a5fcc758cb717ad1..c211b3411a2912b5871d8e9a6ec9013602caeef8 100644 --- a/tests/unit/ops/quantizer/test_dequantize.py +++ b/tests/unit/ops/quantizer/test_dequantize.py @@ -55,7 +55,7 @@ def run_ref_dequantize(quantized_data, params, num_groups, q_bits, is_symmetric_ return (quantized_data * scales + offsets).to(torch.float16) -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("num_groups", [1, 13, 512]) @pytest.mark.parametrize("num_elems", [8, diff --git a/tests/unit/ops/quantizer/test_fake_quantization.py b/tests/unit/ops/quantizer/test_fake_quantization.py index 431b1268bce514e20a82f21a560424b2da5061b5..10148f0272d22aaa500d112d0287e89902af4f64 100644 --- a/tests/unit/ops/quantizer/test_fake_quantization.py +++ b/tests/unit/ops/quantizer/test_fake_quantization.py @@ -34,7 +34,7 @@ def run_quant_dequant(inputs, groups, bits): return quantizer_cuda_module.ds_quantize_fp16(inputs, groups, bits) -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("tensor_shape", [(16, 4096), (128, 256)]) # Test with two tensor shapes as (16, 4096) and (128, 256). @pytest.mark.parametrize("groups", [1, 16]) diff --git a/tests/unit/ops/quantizer/test_quantize.py b/tests/unit/ops/quantizer/test_quantize.py index 899619e15cd461be8b252a263842e75a96f880f1..ebcbb4c11946017a68f7b8be011c52a40f4ef098 100644 --- a/tests/unit/ops/quantizer/test_quantize.py +++ b/tests/unit/ops/quantizer/test_quantize.py @@ -99,7 +99,7 @@ def run_float_quantize(q_bits, is_symmetric_quant, activations_ref, num_groups): return data_i8, params -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("num_groups", [1, 13, 512]) @pytest.mark.parametrize("num_elems", [8, @@ -198,7 +198,7 @@ def run_integer_quantize(q_bits, activations_ref, num_groups): return data_i8, max_abs_activations_ref.to(torch.int32) -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("num_groups", [1, 2, 4, 8, 16, 32, 64, 512]) @pytest.mark.parametrize("num_elems", [4096, 8192, 12288, 16384]) @pytest.mark.parametrize("q_bits", [4, 8]) diff --git a/tests/unit/ops/spatial/test_nhwc_bias_add.py b/tests/unit/ops/spatial/test_nhwc_bias_add.py index d69a2d7bb549a96d8b601a75f489c01ddbdd0118..c863d0f6f1d95591507eea0d5e6ba65125eba70e 100644 --- a/tests/unit/ops/spatial/test_nhwc_bias_add.py +++ b/tests/unit/ops/spatial/test_nhwc_bias_add.py @@ -35,7 +35,7 @@ channels_list = [ ] -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("batch", [1, 2, 10]) @pytest.mark.parametrize("image_size", [16, 32, 64]) @pytest.mark.parametrize("channels", channels_list) @@ -58,7 +58,7 @@ def ref_bias_add_add(activations, bias, other): return (activations + bias.reshape(1, -1, 1, 1)) + other -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("batch", [1, 2, 10]) @pytest.mark.parametrize("image_size", [16, 32, 64]) @pytest.mark.parametrize("channels", channels_list) @@ -93,7 +93,7 @@ def ref_bias_add_bias_add(activations, bias, other, other_bias): 1)) -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("batch", [1, 2, 10]) @pytest.mark.parametrize("image_size", [16, 32, 64]) @pytest.mark.parametrize("channels", channels_list) diff --git a/tests/unit/ops/transformer/inference/test_bias_add.py b/tests/unit/ops/transformer/inference/test_bias_add.py index 2077390aabfcbf2c9ba13b66c9c213e7c56b1163..dbdb698b5707892d79cbe5cedb7f587d22c4c17a 100644 --- a/tests/unit/ops/transformer/inference/test_bias_add.py +++ b/tests/unit/ops/transformer/inference/test_bias_add.py @@ -31,7 +31,7 @@ def run_bias_add_ds(activations, bias): return inference_module.bias_add_fp32(activations, bias) -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("batch", [1, 2]) @pytest.mark.parametrize("sequence", [1, 128, 255]) @pytest.mark.parametrize("channels", [512, 1232, 4096]) diff --git a/tests/unit/ops/transformer/inference/test_bias_geglu.py b/tests/unit/ops/transformer/inference/test_bias_geglu.py index 207734379d673ad2802abf71c1bd809c468051e5..a9f221488a4c286fd53c0326bcf4d218d1fb67d0 100644 --- a/tests/unit/ops/transformer/inference/test_bias_geglu.py +++ b/tests/unit/ops/transformer/inference/test_bias_geglu.py @@ -37,7 +37,7 @@ def run_bias_geglu_ds(activation, bias): return inference_module.bias_geglu(activation, bias) -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("batch", [1, 2]) @pytest.mark.parametrize("sequence", [1, 128, 255]) @pytest.mark.parametrize("channels", [512, 1232, 4096]) diff --git a/tests/unit/ops/transformer/inference/test_bias_gelu.py b/tests/unit/ops/transformer/inference/test_bias_gelu.py index bf0b184fb5fe35952e23eacda6406665dcdfd053..6d3492482a18afd91a2e0da1bc627192d8530bbd 100644 --- a/tests/unit/ops/transformer/inference/test_bias_gelu.py +++ b/tests/unit/ops/transformer/inference/test_bias_gelu.py @@ -48,7 +48,7 @@ def run_bias_gelu_ds(activations, bias): return inference_module.bias_gelu_fp32(activations, bias) -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("batch", [1, 2]) @pytest.mark.parametrize("sequence", [1, 128, 255]) @pytest.mark.parametrize("channels", [512, 1232, 4096]) diff --git a/tests/unit/ops/transformer/inference/test_bias_relu.py b/tests/unit/ops/transformer/inference/test_bias_relu.py index c62b4b29bebda7909d6d1f404753ae69118cb4ab..932b02c01bfa652b08359546c0145bc76ee09215 100644 --- a/tests/unit/ops/transformer/inference/test_bias_relu.py +++ b/tests/unit/ops/transformer/inference/test_bias_relu.py @@ -37,7 +37,7 @@ def run_bias_relu_ds(activations, bias): return inference_module.bias_relu_fp32(activations, bias) -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("batch", [1, 2]) @pytest.mark.parametrize("sequence", [1, 128, 255]) @pytest.mark.parametrize("channels", [512, 1232, 4096]) diff --git a/tests/unit/ops/transformer/inference/test_layer_norm.py b/tests/unit/ops/transformer/inference/test_layer_norm.py index a005e0d7f9f0113d29d3ad6c93ffd994032b671f..21da7ba43462a12372841789702359b2de773397 100644 --- a/tests/unit/ops/transformer/inference/test_layer_norm.py +++ b/tests/unit/ops/transformer/inference/test_layer_norm.py @@ -38,7 +38,7 @@ def ds_implementation(vals, gamma, beta, epsilon): return inference_module.layer_norm(vals, gamma, beta, epsilon) -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("batch", [1, 32]) @pytest.mark.parametrize("seq_len", [1, 128]) @pytest.mark.parametrize("channels", [384, 512, 768, 1024, 2048, 8192, 14432]) @@ -79,7 +79,7 @@ def residual_ds_implementation(vals, bias, res, gamma, beta, epsilon): return inference_module._layer_norm_residual(vals, bias, res, gamma, beta, epsilon) -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("batch", [1, 32]) @pytest.mark.parametrize("seq_len", [1, 128]) @pytest.mark.parametrize("channels", [384, 512, 768, 1024, 2048, 8192, 14432]) @@ -147,7 +147,7 @@ def residual_store_ds_implementation(vals, bias, res, gamma, beta, epsilon): epsilon) -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("batch", [1, 32]) @pytest.mark.parametrize("seq_len", [1, 128]) @pytest.mark.parametrize("channels", [384, 512, 768, 1024, 2048, 8192, 14432]) diff --git a/tests/unit/ops/transformer/inference/test_moe_res_matmult.py b/tests/unit/ops/transformer/inference/test_moe_res_matmult.py index 8b1b1cb161682019b78e2312df55ad5f5c31ec5f..defdc99ac5081ec72b848f40b4674f1f51901abe 100644 --- a/tests/unit/ops/transformer/inference/test_moe_res_matmult.py +++ b/tests/unit/ops/transformer/inference/test_moe_res_matmult.py @@ -32,7 +32,7 @@ def run_moe_res_matmul_ds(residual, coef, output): return inference_module.moe_res_matmul(residual, coef_t, output) -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("hidden_dim", [16, 64]) @pytest.mark.parametrize("c", [1, 4]) @pytest.mark.parametrize("dtype", [torch.float32, torch.float16]) diff --git a/tests/unit/ops/transformer/inference/test_residual_add.py b/tests/unit/ops/transformer/inference/test_residual_add.py index 336008f5a1e80bcd07a6f02d15e6d044aa7bcc2e..e5d4f08c50f4a69f29e32859a860efded55a6e0c 100644 --- a/tests/unit/ops/transformer/inference/test_residual_add.py +++ b/tests/unit/ops/transformer/inference/test_residual_add.py @@ -77,7 +77,7 @@ def run_residual_add_reference(hidden_state, mp_size) -@pytest.mark.inference +@pytest.mark.inference_ops @pytest.mark.parametrize("batch", [1, 2]) @pytest.mark.parametrize("sequence", [1, 128, 255]) @pytest.mark.parametrize("hidden_dim", [512, 1232, 4096])