未验证 提交 fd1d2c64 编写于 作者: M Michael Wyatt 提交者: GitHub

Reduce Unit Test Time (Part 2) (#3838)

* utilize shorter tests for MII

* use cached torch download

* rework zero++ unit tests

* formatting

---------
Co-authored-by: NHeyangQin <heyangqin@microsoft.com>
上级 c973e157
......@@ -23,7 +23,7 @@ jobs:
- name: Install pytorch
run: |
pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/rocm5.4.2
pip install -U --cache-dir /blob/torch_cache torch torchvision --extra-index-url https://download.pytorch.org/whl/rocm5.4.2
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
......
......@@ -24,7 +24,7 @@ jobs:
- name: Install pytorch
run: |
pip install --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cu111
pip install -U --cache-dir /blob/torch_cache torch torchvision --extra-index-url https://download.pytorch.org/whl/cu111
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
......
......@@ -24,7 +24,7 @@ jobs:
- name: Install pytorch
run: |
pip install --no-cache-dir torch==1.13.1 torchvision --extra-index-url https://download.pytorch.org/whl/cu116
pip install -U --cache-dir /blob/torch_cache torch==1.13.1 torchvision --extra-index-url https://download.pytorch.org/whl/cu116
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
......
......@@ -24,7 +24,7 @@ jobs:
- name: Install pytorch
run: |
pip install --no-cache-dir torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
pip install -U --cache-dir /blob/torch_cache torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
......
......@@ -24,7 +24,7 @@ jobs:
- name: Install pytorch
run: |
pip install --no-cache-dir torch==1.13.1 torchvision --extra-index-url https://download.pytorch.org/whl/cu116
pip install -U --cache-dir /blob/torch_cache torch==1.13.1 torchvision --extra-index-url https://download.pytorch.org/whl/cu116
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
......
......@@ -24,7 +24,7 @@ jobs:
- name: Install pytorch
run: |
pip install --no-cache-dir torch==1.13.1 torchvision --extra-index-url https://download.pytorch.org/whl/cu116
pip3 install -U --cache-dir /blob/torch_cache torch
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
......@@ -54,4 +54,4 @@ jobs:
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
if [[ -d ./torch-extensions ]]; then rm -rf ./torch-extensions; fi
cd tests
TRANSFORMERS_CACHE=/blob/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 --forked --verbose -m "CPU or local" ./
TRANSFORMERS_CACHE=/blob/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 --forked --verbose -m "deepspeed" ./
......@@ -20,7 +20,7 @@ jobs:
- name: Install pytorch
run: |
pip install torch==1.13.1 torchvision --extra-index-url https://download.pytorch.org/whl/cu116
pip install -U --cache-dir /blob/torch_cache torch==1.13.1 torchvision --extra-index-url https://download.pytorch.org/whl/cu116
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
......
......@@ -24,7 +24,7 @@ jobs:
- name: Install pytorch
run: |
pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu116
pip install -U --cache-dir /blob/torch_cache torch torchvision --extra-index-url https://download.pytorch.org/whl/cu116
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
......
......@@ -20,7 +20,7 @@ jobs:
- name: Install pytorch
run: |
pip install --no-cache-dir torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
pip install -U --cache-dir /blob/torch_cache torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
......
......@@ -20,7 +20,7 @@ jobs:
- name: Install pytorch
run: |
pip install --no-cache-dir torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
pip install -U --cache-dir /blob/torch_cache torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
......
......@@ -25,7 +25,7 @@ jobs:
- name: Install pytorch
run: |
# use the same pytorch version as transformers CI
pip install --no-cache-dir torch torchvision torchaudio -f https://download.pytorch.org/whl/torch_stable.html
pip install -U --cache-dir /blob/torch_cache torch torchvision torchaudio -f https://download.pytorch.org/whl/torch_stable.html
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
......
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
import pytest
import deepspeed.comm as dist
from unit.common import DistributedTest
from unit.simple_model import random_dataloader
import deepspeed
import torch.nn as nn
class NNModel(nn.Module):
def __init__(self, h_dim=1024, n_layers=2):
super(NNModel, self).__init__()
self.layers = nn.ModuleList([nn.Linear(h_dim, h_dim) for i in range(n_layers)])
self.cross_entropy_loss = nn.CrossEntropyLoss()
def forward(self, x, y):
for layer in self.layers:
x = layer(x)
return self.cross_entropy_loss(x, y)
#Large sweep along hidden dim, num_layers of different sizes for qgZeRO.
@pytest.mark.parametrize("h_dim", [1024, 2000])
@pytest.mark.parametrize("n_layers", [8, 20])
class TesthpZeroConfigSweep(DistributedTest):
world_size = 4
def test(self, h_dim: int, n_layers: int) -> None:
config_dict = {
"train_micro_batch_size_per_gpu": 1,
"zero_optimization": {
"stage": 3,
"reduce_scatter": True,
"zero_quantized_gradients": True
},
"optimizer": {
"type": "Adam",
"params": {
"lr": 1.
}
},
"fp16": {
"enabled": True,
"loss_scale": 1.,
}
}
model = NNModel(h_dim, n_layers)
model, _, _, _ = deepspeed.initialize(model=model, model_parameters=model.parameters(), config=config_dict)
data_loader = random_dataloader(model=model, total_samples=20, hidden_dim=h_dim, device=model.device)
dist.barrier()
for n, batch in enumerate(data_loader):
loss = model(batch[0], batch[1])
model.backward(loss)
model.step()
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
import pytest
import deepspeed.comm as dist
from unit.common import DistributedTest
from unit.simple_model import random_dataloader
import deepspeed
import torch.nn as nn
class NNModel(nn.Module):
def __init__(self, h_dim=1024, n_layers=2):
super(NNModel, self).__init__()
self.layers = nn.ModuleList([nn.Linear(h_dim, h_dim) for i in range(n_layers)])
self.cross_entropy_loss = nn.CrossEntropyLoss()
def forward(self, x, y):
for layer in self.layers:
x = layer(x)
return self.cross_entropy_loss(x, y)
#Large sweep along hidden dim, num_layers of different sizes for qwZeRO.
@pytest.mark.parametrize("h_dim", [1024, 2048])
@pytest.mark.parametrize("n_layers", [8, 20])
class TesthpZeroConfigSweep(DistributedTest):
world_size = 4
def test(self, h_dim: int, n_layers: int) -> None:
config_dict = {
"train_micro_batch_size_per_gpu": 1,
"zero_optimization": {
"stage": 3,
"reduce_scatter": True,
"zero_quantized_weights": True
},
"optimizer": {
"type": "Adam",
"params": {
"lr": 1.
}
},
"fp16": {
"enabled": True,
"loss_scale": 1.,
}
}
model = NNModel(h_dim, n_layers)
model, _, _, _ = deepspeed.initialize(model=model, model_parameters=model.parameters(), config=config_dict)
data_loader = random_dataloader(model=model, total_samples=20, hidden_dim=h_dim, device=model.device)
dist.barrier()
for n, batch in enumerate(data_loader):
loss = model(batch[0], batch[1])
model.backward(loss)
model.step()
......@@ -40,12 +40,19 @@ def _assert_no_secondary_tensor_group(model: Module) -> None:
assert param.ds_zero_param_process_group is None
def _assert_secondary_tensor_size(model: Module) -> None:
for _, param in model.named_parameters():
assert param.ds_secondary_tensor is not None
assert param.ds_secondary_tensor.size()[0] % param.ds_tensor.size()[0] == 0
#Large sweep along hidden dim, num_layers, and zpg of different sizes
#Assert when zpg=1 that secondary group and tensors are invalid
@pytest.mark.parametrize("h_dim", [1024, 2000])
@pytest.mark.parametrize("n_layers", [8, 20])
@pytest.mark.sequential
@pytest.mark.parametrize("h_dim", [1024])
@pytest.mark.parametrize("n_layers", [4, 9])
@pytest.mark.parametrize("zpg", [1, 2, 4])
class TesthpZeroConfigSweep(DistributedTest):
class TestZeroPPConfigSweep(DistributedTest):
world_size = 4
def test(self, h_dim: int, n_layers: int, zpg: int) -> None:
......@@ -55,6 +62,8 @@ class TesthpZeroConfigSweep(DistributedTest):
"stage": 3,
"stage3_max_reuse_distance": 0,
"zero_hpz_partition_size": zpg,
"zero_quantized_weights": True,
"zero_quantized_gradients": True,
"contiguous_gradients": True,
"overlap_comm": True,
},
......@@ -78,53 +87,8 @@ class TesthpZeroConfigSweep(DistributedTest):
_assert_no_secondary_tensor_group(model)
for n, batch in enumerate(data_loader):
if n == 0 and zpg != 1:
_assert_secondary_tensor_size(model)
loss = model(batch[0], batch[1])
model.backward(loss)
model.step()
def _assert_secondary_tensor_size(model: Module) -> None:
for _, param in model.named_parameters():
assert param.ds_secondary_tensor is not None
assert param.ds_secondary_tensor.size()[0] % param.ds_tensor.size()[0] == 0
#Tests that secondary tensors are available and are of right sizes
@pytest.mark.parametrize("h_dim", [1024, 4000])
@pytest.mark.parametrize("n_layers", [8, 20])
@pytest.mark.parametrize("zpg", [2, 4])
class TestSecondaryTensorSize(DistributedTest):
world_size = 4
def test(self, h_dim: int, n_layers: int, zpg: int) -> None:
config_dict = {
"train_micro_batch_size_per_gpu": 1,
"zero_optimization": {
"stage": 3,
"stage3_max_reuse_distance": 0,
"zero_hpz_partition_size": zpg,
"contiguous_gradients": True,
"overlap_comm": True,
},
"optimizer": {
"type": "Adam",
"params": {
"lr": 1.
}
},
"fp16": {
"enabled": True,
"loss_scale": 1.,
}
}
model = NNModel(h_dim, n_layers)
model, _, _, _ = deepspeed.initialize(model=model, model_parameters=model.parameters(), config=config_dict)
data_loader = random_dataloader(model=model, total_samples=4, hidden_dim=h_dim, device=model.device)
dist.barrier()
for n, batch in enumerate(data_loader):
loss = model(batch[0], batch[1])
model.backward(loss)
_assert_secondary_tensor_size(model)
if n == 0: break
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册