diff --git a/.github/workflows/nv-inference.yml b/.github/workflows/nv-inference.yml
index 9879279ab1ef550fda473a80b139dee6df1a46b5..dac0462611d74af2b32239ea0745c87964a949af 100644
--- a/.github/workflows/nv-inference.yml
+++ b/.github/workflows/nv-inference.yml
@@ -60,4 +60,5 @@ jobs:
           if [[ -d ./torch-extensions ]]; then rm -rf ./torch-extensions; fi
           cd tests
           TRANSFORMERS_CACHE=/blob/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 --verbose -m 'seq_inference' unit/ --torch_ver="1.13" --cuda_ver="11.6"
+          TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 --verbose -m 'inference_ops' unit/ --torch_ver="1.13" --cuda_ver="11.6"
           TRANSFORMERS_CACHE=/blob/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 --forked -n 4 --verbose -m 'inference' unit/ --torch_ver="1.13" --cuda_ver="11.6"
diff --git a/tests/pytest.ini b/tests/pytest.ini
index b7ee315be8018581b073df54de66ca8f524b4d52..08b666867b79f11a43abed1b802ae1cf125fb7e0 100644
--- a/tests/pytest.ini
+++ b/tests/pytest.ini
@@ -1,7 +1,8 @@
 [pytest]
-addopts = -m "not sequential and not nightly and not inference and not seq_inference"
+addopts = -m "not sequential and not nightly and not inference and not seq_inference and not inference_ops"
 markers =
     sequential:Tests that need to be run sequentially
     inference:Inference model tests
+    inference_ops:Individual inference operator tests
     seq_inference:Inference model tests to run sequentially
     nightly:Tests that should be run nightly
diff --git a/tests/unit/ops/quantizer/test_dequantize.py b/tests/unit/ops/quantizer/test_dequantize.py
index bfa4bf3e85be922219265211a5fcc758cb717ad1..c211b3411a2912b5871d8e9a6ec9013602caeef8 100644
--- a/tests/unit/ops/quantizer/test_dequantize.py
+++ b/tests/unit/ops/quantizer/test_dequantize.py
@@ -55,7 +55,7 @@ def run_ref_dequantize(quantized_data, params, num_groups, q_bits, is_symmetric_
         return (quantized_data * scales + offsets).to(torch.float16)
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("num_groups", [1, 13, 512])
 @pytest.mark.parametrize("num_elems",
                          [8,
diff --git a/tests/unit/ops/quantizer/test_fake_quantization.py b/tests/unit/ops/quantizer/test_fake_quantization.py
index 431b1268bce514e20a82f21a560424b2da5061b5..10148f0272d22aaa500d112d0287e89902af4f64 100644
--- a/tests/unit/ops/quantizer/test_fake_quantization.py
+++ b/tests/unit/ops/quantizer/test_fake_quantization.py
@@ -34,7 +34,7 @@ def run_quant_dequant(inputs, groups, bits):
     return quantizer_cuda_module.ds_quantize_fp16(inputs, groups, bits)
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("tensor_shape", [(16, 4096), (128, 256)])
 # Test with two tensor shapes as (16, 4096) and (128, 256).
 @pytest.mark.parametrize("groups", [1, 16])
diff --git a/tests/unit/ops/quantizer/test_quantize.py b/tests/unit/ops/quantizer/test_quantize.py
index 899619e15cd461be8b252a263842e75a96f880f1..ebcbb4c11946017a68f7b8be011c52a40f4ef098 100644
--- a/tests/unit/ops/quantizer/test_quantize.py
+++ b/tests/unit/ops/quantizer/test_quantize.py
@@ -99,7 +99,7 @@ def run_float_quantize(q_bits, is_symmetric_quant, activations_ref, num_groups):
     return data_i8, params
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("num_groups", [1, 13, 512])
 @pytest.mark.parametrize("num_elems",
                          [8,
@@ -198,7 +198,7 @@ def run_integer_quantize(q_bits, activations_ref, num_groups):
     return data_i8, max_abs_activations_ref.to(torch.int32)
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("num_groups", [1, 2, 4, 8, 16, 32, 64, 512])
 @pytest.mark.parametrize("num_elems", [4096, 8192, 12288, 16384])
 @pytest.mark.parametrize("q_bits", [4, 8])
diff --git a/tests/unit/ops/spatial/test_nhwc_bias_add.py b/tests/unit/ops/spatial/test_nhwc_bias_add.py
index d69a2d7bb549a96d8b601a75f489c01ddbdd0118..c863d0f6f1d95591507eea0d5e6ba65125eba70e 100644
--- a/tests/unit/ops/spatial/test_nhwc_bias_add.py
+++ b/tests/unit/ops/spatial/test_nhwc_bias_add.py
@@ -35,7 +35,7 @@ channels_list = [
 ]
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2, 10])
 @pytest.mark.parametrize("image_size", [16, 32, 64])
 @pytest.mark.parametrize("channels", channels_list)
@@ -58,7 +58,7 @@ def ref_bias_add_add(activations, bias, other):
     return (activations + bias.reshape(1, -1, 1, 1)) + other
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2, 10])
 @pytest.mark.parametrize("image_size", [16, 32, 64])
 @pytest.mark.parametrize("channels", channels_list)
@@ -93,7 +93,7 @@ def ref_bias_add_bias_add(activations, bias, other, other_bias):
                                                                          1))
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2, 10])
 @pytest.mark.parametrize("image_size", [16, 32, 64])
 @pytest.mark.parametrize("channels", channels_list)
diff --git a/tests/unit/ops/transformer/inference/test_bias_add.py b/tests/unit/ops/transformer/inference/test_bias_add.py
index 2077390aabfcbf2c9ba13b66c9c213e7c56b1163..dbdb698b5707892d79cbe5cedb7f587d22c4c17a 100644
--- a/tests/unit/ops/transformer/inference/test_bias_add.py
+++ b/tests/unit/ops/transformer/inference/test_bias_add.py
@@ -31,7 +31,7 @@ def run_bias_add_ds(activations, bias):
         return inference_module.bias_add_fp32(activations, bias)
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2])
 @pytest.mark.parametrize("sequence", [1, 128, 255])
 @pytest.mark.parametrize("channels", [512, 1232, 4096])
diff --git a/tests/unit/ops/transformer/inference/test_bias_geglu.py b/tests/unit/ops/transformer/inference/test_bias_geglu.py
index 207734379d673ad2802abf71c1bd809c468051e5..a9f221488a4c286fd53c0326bcf4d218d1fb67d0 100644
--- a/tests/unit/ops/transformer/inference/test_bias_geglu.py
+++ b/tests/unit/ops/transformer/inference/test_bias_geglu.py
@@ -37,7 +37,7 @@ def run_bias_geglu_ds(activation, bias):
     return inference_module.bias_geglu(activation, bias)
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2])
 @pytest.mark.parametrize("sequence", [1, 128, 255])
 @pytest.mark.parametrize("channels", [512, 1232, 4096])
diff --git a/tests/unit/ops/transformer/inference/test_bias_gelu.py b/tests/unit/ops/transformer/inference/test_bias_gelu.py
index bf0b184fb5fe35952e23eacda6406665dcdfd053..6d3492482a18afd91a2e0da1bc627192d8530bbd 100644
--- a/tests/unit/ops/transformer/inference/test_bias_gelu.py
+++ b/tests/unit/ops/transformer/inference/test_bias_gelu.py
@@ -48,7 +48,7 @@ def run_bias_gelu_ds(activations, bias):
         return inference_module.bias_gelu_fp32(activations, bias)
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2])
 @pytest.mark.parametrize("sequence", [1, 128, 255])
 @pytest.mark.parametrize("channels", [512, 1232, 4096])
diff --git a/tests/unit/ops/transformer/inference/test_bias_relu.py b/tests/unit/ops/transformer/inference/test_bias_relu.py
index c62b4b29bebda7909d6d1f404753ae69118cb4ab..932b02c01bfa652b08359546c0145bc76ee09215 100644
--- a/tests/unit/ops/transformer/inference/test_bias_relu.py
+++ b/tests/unit/ops/transformer/inference/test_bias_relu.py
@@ -37,7 +37,7 @@ def run_bias_relu_ds(activations, bias):
         return inference_module.bias_relu_fp32(activations, bias)
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2])
 @pytest.mark.parametrize("sequence", [1, 128, 255])
 @pytest.mark.parametrize("channels", [512, 1232, 4096])
diff --git a/tests/unit/ops/transformer/inference/test_layer_norm.py b/tests/unit/ops/transformer/inference/test_layer_norm.py
index a005e0d7f9f0113d29d3ad6c93ffd994032b671f..21da7ba43462a12372841789702359b2de773397 100644
--- a/tests/unit/ops/transformer/inference/test_layer_norm.py
+++ b/tests/unit/ops/transformer/inference/test_layer_norm.py
@@ -38,7 +38,7 @@ def ds_implementation(vals, gamma, beta, epsilon):
     return inference_module.layer_norm(vals, gamma, beta, epsilon)
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 32])
 @pytest.mark.parametrize("seq_len", [1, 128])
 @pytest.mark.parametrize("channels", [384, 512, 768, 1024, 2048, 8192, 14432])
@@ -79,7 +79,7 @@ def residual_ds_implementation(vals, bias, res, gamma, beta, epsilon):
     return inference_module._layer_norm_residual(vals, bias, res, gamma, beta, epsilon)
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 32])
 @pytest.mark.parametrize("seq_len", [1, 128])
 @pytest.mark.parametrize("channels", [384, 512, 768, 1024, 2048, 8192, 14432])
@@ -147,7 +147,7 @@ def residual_store_ds_implementation(vals, bias, res, gamma, beta, epsilon):
                                                       epsilon)
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 32])
 @pytest.mark.parametrize("seq_len", [1, 128])
 @pytest.mark.parametrize("channels", [384, 512, 768, 1024, 2048, 8192, 14432])
diff --git a/tests/unit/ops/transformer/inference/test_moe_res_matmult.py b/tests/unit/ops/transformer/inference/test_moe_res_matmult.py
index 8b1b1cb161682019b78e2312df55ad5f5c31ec5f..defdc99ac5081ec72b848f40b4674f1f51901abe 100644
--- a/tests/unit/ops/transformer/inference/test_moe_res_matmult.py
+++ b/tests/unit/ops/transformer/inference/test_moe_res_matmult.py
@@ -32,7 +32,7 @@ def run_moe_res_matmul_ds(residual, coef, output):
     return inference_module.moe_res_matmul(residual, coef_t, output)
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("hidden_dim", [16, 64])
 @pytest.mark.parametrize("c", [1, 4])
 @pytest.mark.parametrize("dtype", [torch.float32, torch.float16])
diff --git a/tests/unit/ops/transformer/inference/test_residual_add.py b/tests/unit/ops/transformer/inference/test_residual_add.py
index 336008f5a1e80bcd07a6f02d15e6d044aa7bcc2e..e5d4f08c50f4a69f29e32859a860efded55a6e0c 100644
--- a/tests/unit/ops/transformer/inference/test_residual_add.py
+++ b/tests/unit/ops/transformer/inference/test_residual_add.py
@@ -77,7 +77,7 @@ def run_residual_add_reference(hidden_state,
                                      mp_size)
 
 
-@pytest.mark.inference
+@pytest.mark.inference_ops
 @pytest.mark.parametrize("batch", [1, 2])
 @pytest.mark.parametrize("sequence", [1, 128, 255])
 @pytest.mark.parametrize("hidden_dim", [512, 1232, 4096])