From b2e118bf4fd93a31bdc173442b7e74b4ead9164f Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Thu, 13 Jul 2023 11:50:17 -0700 Subject: [PATCH] Update H100 workflow to open an issue if nightly CI fails --- .github/ISSUE_TEMPLATE/ci_failure_template.md | 10 ++++++++++ .github/workflows/nv-h100.yml | 13 +++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/ci_failure_template.md diff --git a/.github/ISSUE_TEMPLATE/ci_failure_template.md b/.github/ISSUE_TEMPLATE/ci_failure_template.md new file mode 100644 index 00000000..27f07581 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/ci_failure_template.md @@ -0,0 +1,10 @@ +--- +name: CI failure report +about: Report a DeepSpeed CI failure +title: "{{ env.TESTNAME }} CI test failure" +labels: ci-failure +assignees: '' + +--- + +The Nightly CI for {{ env.TESTNAME }} failed. diff --git a/.github/workflows/nv-h100.yml b/.github/workflows/nv-h100.yml index 33f248c4..b19d8eca 100644 --- a/.github/workflows/nv-h100.yml +++ b/.github/workflows/nv-h100.yml @@ -8,6 +8,10 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true +permissions: + contents: read + issues: write + jobs: unit-tests: runs-on: [self-hosted, nvidia, h100] @@ -49,3 +53,12 @@ jobs: cd tests python -m pytest $PYTEST_OPTS -n 4 unit/ --torch_ver="2.0" --cuda_ver="12" python -m pytest $PYTEST_OPTS -m 'sequential' unit/ --torch_ver="2.0" --cuda_ver="12" + + - name: Open GitHub issue if nightly CI fails + if: failure() + uses: JasonEtco/create-an-issue@v2 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + TESTNAME: nv-h100 + with: + filename: .github/ISSUE_TEMPLATE/ci_failure_report.md -- GitLab