diff --git a/tests/unit/inference/test_inference.py b/tests/unit/inference/test_inference.py index a791327efbf287d0ffc5ccd778356fbeee05a921..7c7b54f0da96f5c964c5775c6a6d2ac1e4f072db 100644 --- a/tests/unit/inference/test_inference.py +++ b/tests/unit/inference/test_inference.py @@ -384,10 +384,7 @@ class TestMPSize(DistributedTest): @pytest.mark.seq_inference -@pytest.mark.parametrize("model_w_task", - [("EleutherAI/gpt-j-6B", - "text-generation")], - ids=["gpt-j"]) +@pytest.mark.parametrize("model_w_task", [("EleutherAI/gpt-j-6B", "text-generation")], ids=["gpt-j"]) class TestLowCpuMemUsage(DistributedTest): world_size = 1 @@ -411,11 +408,7 @@ class TestLowCpuMemUsage(DistributedTest): # We have to load these large models on CPU with pipeline because not # enough GPU memory - pipe = pipeline(task, - model=model, - tokenizer=tokenizer, - device=-1, - framework="pt") + pipe = pipeline(task, model=model, tokenizer=tokenizer, device=-1, framework="pt") bs_output = pipe(query, **inf_kwargs) pipe.model = deepspeed.init_inference(pipe.model,