提交 25757239 编写于 作者: L Logan Adams

Add all nightly/switch envvar name

上级 3942858e
--- ---
name: CI failure report name: CI failure report
about: Report a DeepSpeed CI failure about: Report a DeepSpeed CI failure
title: "{{ env.TESTNAME }} CI test failure" title: "{{ env.GITHUB_ACTION }} CI test failure"
labels: ci-failure labels: ci-failure
assignees: '' assignees: ''
......
name: nv-h100 name: nv-h100
on: on:
pull_request:
branches:
'**'
paths-ignore:
- 'docs/**'
- 'blogs/**'
schedule: schedule:
- cron: "0 0 * * *" - cron: "0 0 * * *"
...@@ -65,6 +59,5 @@ jobs: ...@@ -65,6 +59,5 @@ jobs:
uses: JasonEtco/create-an-issue@v2 uses: JasonEtco/create-an-issue@v2
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
TESTNAME: nv-h100
with: with:
filename: .github/ISSUE_TEMPLATE/ci_failure_report.md filename: .github/ISSUE_TEMPLATE/ci_failure_report.md
...@@ -8,6 +8,10 @@ concurrency: ...@@ -8,6 +8,10 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }} group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true cancel-in-progress: true
permissions:
contents: read
issues: write
jobs: jobs:
unit-tests: unit-tests:
runs-on: [self-hosted, nvidia, cu116, v100] runs-on: [self-hosted, nvidia, cu116, v100]
...@@ -47,3 +51,11 @@ jobs: ...@@ -47,3 +51,11 @@ jobs:
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
cd tests cd tests
pytest $PYTEST_OPTS --forked -m 'nightly' unit/ --torch_ver="1.13" --cuda_ver="11.6" pytest $PYTEST_OPTS --forked -m 'nightly' unit/ --torch_ver="1.13" --cuda_ver="11.6"
- name: Open GitHub issue if nightly CI fails
if: failure()
uses: JasonEtco/create-an-issue@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
filename: .github/ISSUE_TEMPLATE/ci_failure_report.md
...@@ -8,6 +8,10 @@ concurrency: ...@@ -8,6 +8,10 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }} group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true cancel-in-progress: true
permissions:
contents: read
issues: write
jobs: jobs:
unit-tests: unit-tests:
runs-on: [self-hosted, nvidia, cu116, v100] runs-on: [self-hosted, nvidia, cu116, v100]
...@@ -48,3 +52,11 @@ jobs: ...@@ -48,3 +52,11 @@ jobs:
cd tests cd tests
pytest $PYTEST_OPTS --forked -n 4 unit/ pytest $PYTEST_OPTS --forked -n 4 unit/
pytest $PYTEST_OPTS --forked -m 'sequential' unit/ pytest $PYTEST_OPTS --forked -m 'sequential' unit/
- name: Open GitHub issue if nightly CI fails
if: failure()
uses: JasonEtco/create-an-issue@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
filename: .github/ISSUE_TEMPLATE/ci_failure_report.md
...@@ -8,6 +8,10 @@ concurrency: ...@@ -8,6 +8,10 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }} group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true cancel-in-progress: true
permissions:
contents: read
issues: write
jobs: jobs:
unit-tests: unit-tests:
runs-on: [self-hosted, nvidia, cu111, p40] runs-on: [self-hosted, nvidia, cu111, p40]
...@@ -47,3 +51,11 @@ jobs: ...@@ -47,3 +51,11 @@ jobs:
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
cd tests cd tests
pytest $PYTEST_OPTS --forked -n 4 unit/ --torch_ver="1.9" --cuda_ver="11.1" pytest $PYTEST_OPTS --forked -n 4 unit/ --torch_ver="1.9" --cuda_ver="11.1"
- name: Open GitHub issue if nightly CI fails
if: failure()
uses: JasonEtco/create-an-issue@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
filename: .github/ISSUE_TEMPLATE/ci_failure_report.md
...@@ -8,6 +8,10 @@ concurrency: ...@@ -8,6 +8,10 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }} group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true cancel-in-progress: true
permissions:
contents: read
issues: write
jobs: jobs:
unit-tests: unit-tests:
runs-on: [self-hosted, nvidia, cu111, v100] runs-on: [self-hosted, nvidia, cu111, v100]
...@@ -48,3 +52,11 @@ jobs: ...@@ -48,3 +52,11 @@ jobs:
cd tests cd tests
pytest $PYTEST_OPTS --forked -n 4 unit/ --torch_ver="1.9" --cuda_ver="11" pytest $PYTEST_OPTS --forked -n 4 unit/ --torch_ver="1.9" --cuda_ver="11"
pytest $PYTEST_OPTS --forked -m 'sequential' unit/ --torch_ver="1.9" --cuda_ver="11" pytest $PYTEST_OPTS --forked -m 'sequential' unit/ --torch_ver="1.9" --cuda_ver="11"
- name: Open GitHub issue if nightly CI fails
if: failure()
uses: JasonEtco/create-an-issue@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
filename: .github/ISSUE_TEMPLATE/ci_failure_report.md
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册