trigger: paths: exclude: - docs - azure jobs: - job: DeepSpeed_Tests timeoutInMinutes: 360 pool: name: 'DS_testing' strategy: matrix: PyTorch12-CUDA100: python.version: '3.6' cuda.version: '10.0' pytorch.version: '1.2' torchvision.version: '0.4.0' runmodeltests: false #PyTorch15-CUDA101: # python.version: '3.7' # cuda.version: '10.1' # pytorch.version: '1.5.0+cu101' # torchvision.version: '0.6.0+cu101' # runmodeltests: true ##PyTorch15-CUDA102: # python.version: '3.7' # cuda.version: '10.2' # pytorch.version: '1.5' # torchvision.version: '0.6.1' # runmodeltests: true variables: conda_env: 'ds_test_py$(python.version)_cuda$(cuda.version)_pytorch$(pytorch.version)' steps: # Unfortunately nvidia's nvcc_linux-64= seems to install 10.1 regardless? # Most of this complexity is a workaround to get the compiler toolchain to match the # cudatoolkit runtime - script: | conda create --force --yes -n $(conda_env) python=$(python.version) cudatoolkit=$(cuda.version) source activate $(conda_env) conda install -q --yes conda conda install -q --yes pip conda install -q --yes gxx_linux-64 echo "PATH=$PATH, LD_LIBRARY_PATH=$LD_LIBRARY_PATH" displayName: 'Setup environment python=$(python.version) pytorch=$(pytorch.version) cuda=$(cuda.version)' # Manually install torch/torchvision first to enforce versioning. - script: | source activate $(conda_env) pip install --progress-bar=off torch==$(pytorch.version) torchvision==$(torchvision.version) pip install .[dev] ds_report displayName: 'Install DeepSpeed' - script: | source activate $(conda_env) which python python --version which nvcc nvcc --version which deepspeed python -c "import torch; print('torch:', torch.__version__, torch)" python -c "import torch; print('CUDA available:', torch.cuda.is_available())" python -c "import deepspeed; print('deepspeed:', deepspeed.__version__)" displayName: 'Show environment' - script: | source activate $(conda_env) if [[ -d ./torch-extensions ]]; then rm -rf ./torch-extensions; fi TORCH_EXTENSIONS_DIR=./torch-extensions pytest --durations=0 --forked --verbose -x tests/unit/ displayName: 'Unit tests' # - script: | # source activate $(conda_env) # ln -s /data/Megatron-LM/data DeepSpeedExamples/Megatron-LM/ # pip install --progress-bar=off -r DeepSpeedExamples/Megatron-LM/requirements.txt # cd tests/model/ # rm -rf BingBertSquad/baseline # rm -rf Megatron_GPT2/baseline # pytest --durations=0 -s run_sanity_check.py # condition: and(succeeded(), eq(variables['runmodeltests'], true)) # displayName: 'Model tests' # #BingBertSquad logs # - task: PublishPipelineArtifact@1 # inputs: # targetPath: '$(Build.SourcesDirectory)/tests/model/BingBertSquad/test/' # artifactName: BingBertSquad_logs # displayName: 'BingBertSquad log uploads' # condition: eq(variables['runmodeltests'], true) - job: Code_Quality_Checks pool: vmImage: 'ubuntu-latest' variables: conda_env: 'ds_codetest' steps: - bash: echo "##vso[task.prependpath]$CONDA/bin" displayName: Add conda to PATH - script: | conda create --force --yes -n $(conda_env) python=3.7 source activate $(conda_env) displayName: 'Create code test environment' - script: | source activate $(conda_env) pip install pre-commit pre-commit run --all-files displayName: 'Formatting checks' - script: | source activate $(conda_env) pip install pylint pylint --exit-zero deepspeed/ displayName: 'Code linter'