...
 
Commits (17)
    https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/9c0e4ce6cc5bda4a7e8a11d6c96c469a9a6d7e8a test 2021-10-25T18:55:54+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/1df9cebb996114b0e3e3ea0b904ec8d1c585b5b7 test 2021-10-25T18:58:26+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/775a26361aa6861ee508a74762831899d2aa2380 test 2021-10-25T20:43:06+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/4da6c124a8b191e9d43a40bf47592b5460f4f206 test 2021-10-25T20:48:29+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/48e3e907871734d6c0dafdae006bd3bafd3adadd test 2021-10-25T20:53:05+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/801ceb552e61c4f6fad67b7fe06f6a16ffbc120d test 2021-10-25T20:54:18+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/428b30ac4d07f80651240b69ead771727ce7a4d0 test 2021-10-25T20:55:57+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/776b6513f1087d496a163d1c97c6098ede1e9ce2 test 2021-10-25T21:04:49+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/897f976947d1c0a30de612811eae3aacb5b511f5 test 2021-10-25T21:08:33+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/a0c015b32dd26b4dfd0fb72cb9fd97b207663fb2 test 2021-10-25T21:11:02+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/fc47d1b025c3be6d026f1a059d3ef6bca84f0289 test 2021-10-25T21:17:15+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/238f649371ccefd7643b01e11434d5352328d9ee test 2021-10-25T21:28:55+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/c9d9e8dff9b8cd66c140406d8e210b4621b16164 test 2021-10-25T21:37:00+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/6c76e33d034733a902fa483e3ff7592f7a95e81a test 2021-10-25T21:46:54+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/e8a6aaf4def5b1a0960acd984f8f1b4b16797a5d test 2021-10-25T22:09:03+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/358c0b86835b2921bd4f3870098c6edeb62f97fe test 2021-10-25T22:12:08+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/843012a9afd9cf597fd108681562eca8f09aff31 test 2021-10-25T22:14:11+08:00 ShawnXuan xiexuanx2@gmail.com
......@@ -38,8 +38,39 @@ jobs:
with:
access_token: ${{ github.token }}
all_but_latest: true
find-oss-wheel:
name: "Set env and Find wheel in oss"
runs-on: ubuntu-latest
outputs:
find-wheel-hit: ${{ steps.find-wheel.outputs.find-wheel-hit }}
ONEFLOW_WHEEL_PATH: ${{ steps.set-wheel-path.outputs.ONEFLOW_WHEEL_PATH }}
steps:
- name: Checkout Oneflow-Inc/oneflow
uses: actions/checkout@v2
with:
repository: Oneflow-Inc/oneflow
ref: ${{ github.event.inputs.of_branch_or_commit }}
path: ${{ env.ONEFLOW_SRC }}
- name: Get Oneflow Wheel Path
id: set-wheel-path
run: |
set -x
cd ${{ env.ONEFLOW_SRC }}
oneflow_commit=`git rev-parse HEAD`
echo "oneflow_commit=${oneflow_commit}" >> $GITHUB_ENV
oneflow_wheel_path=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/commit/${oneflow_commit}/${{ github.event.inputs.compute_platform }}
echo "::set-output name=ONEFLOW_WHEEL_PATH::${oneflow_wheel_path}"
set +x
- name: Check if wheel available
id: find-wheel
uses: Oneflow-Inc/get-oneflow/find-wheel@ee5b8d83dfc4645d0e67ae603d31f78fd4b1c1a7
with:
ref: ${{ env.oneflow_commit }}
entry: ${{ github.event.inputs.compute_platform }}
find-build-cache:
name: "Find build cache"
name: "Find build wheel"
needs: [find-oss-wheel]
if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit == 1 }}
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.find-cache.outputs.matrix }}
......@@ -62,10 +93,9 @@ jobs:
oneflow-src: ${{ env.ONEFLOW_SRC }}
entries: |
${{ github.event.inputs.compute_platform }}
build-manylinux:
name: "Build manylinux"
build-oneflow:
name: "Build OneFlow ${{ github.event.inputs.compute_platform }}"
runs-on: ['self-hosted', 'linux', 'provision']
# runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v']
needs: [find-build-cache]
strategy:
fail-fast: true
......@@ -76,9 +106,11 @@ jobs:
WHEELHOUSE_DIR: manylinux-wheelhouse
OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }}
OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }}
outputs:
ONEFLOW_WHEEL_PATH: ${{ steps.set-env.outputs.ONEFLOW_WHEEL_PATH }}
steps:
- name: Test only
run: |
set -x
echo '${{ matrix.entry }} == ${{ github.event.inputs.compute_platform }}'
- name: Fix permissions
run: |
set -x
......@@ -103,13 +135,11 @@ jobs:
# oneflow_branch=`git branch --show-current`
# oneflow_branch=`git symbolic-ref --short HEAD`
oneflow_commit=`git rev-parse HEAD`
oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }}
# oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }}
oss_branch_dir=branch/${oneflow_branch}/${{ matrix.entry }}
oss_dir=${oss_branch_dir}/${oneflow_commit}
echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV
echo "oss_dir=${oss_dir}" >> $GITHUB_ENV
oneflow_wheel_path=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }}/${oneflow_commit}
# echo "ONEFLOW_WHEEL_PATH=${oneflow_wheel_path}" >> $GITHUB_ENV
echo "::set-output name=ONEFLOW_WHEEL_PATH::${oneflow_wheel_path}"
cd $current_dir
set +x
- uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472
......@@ -121,15 +151,10 @@ jobs:
entry: ${{ matrix.entry }}
digest-type: build
mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }}
# - name: Check digest and fail if cache result not identical to matrix
# if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }}
# run: |
# echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit"
# exit 1
- uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472
name: Build manylinux ${{ github.event.inputs.compute_platform }}
name: Build OneFlow ${{ matrix.entry }}
id: build-cuda
if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }}
if: ${{ matrix.entry == ${{ github.event.inputs.compute_platform }} && !matrix.cache-hit }}
with:
cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake
build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh
......@@ -161,94 +186,94 @@ jobs:
run: |
python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
python3 -m pip install oss2 beautifulsoup4 --user
python3 ${{ env.ONEFLOW_SRC }}/tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html
python3 ${{ env.ONEFLOW_SRC }}/tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html --index_key=${{ needs.find-oss-wheel.outputs.ONEFLOW_WHEEL_PATH }}/index.html
test:
name: Test suite
# needs: [find-test-cache]build-manylinux
needs: [build-manylinux]
# runs-on: ['self-hosted', 'linux', 'provision']
runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v']
env:
TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test"
TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae
# strategy:
# fail-fast: true
# max-parallel: 5
# matrix: ${{ fromJson(needs.find-test-cache.outputs.matrix) }}
steps:
- name: Fix permissions
# if: ${{ contains(matrix.runs-on, 'self-hosted') }}
run: |
set -x
docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) .
- name: Checkout Oneflow-Inc/OneFlow-Benchmark
uses: actions/checkout@v2
- name: Remove container
timeout-minutes: 45
# if: ${{ contains(matrix.runs-on, 'self-hosted') }}
run: |
docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true
- name: Enable Pytorch container
run: |
echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV
# - name: Set environment variables
# # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
# run: |
# set -x
# echo "ONEFLOW_TEST_CACHE_DIR=$HOME/ci-cache/test_cache" >> $GITHUB_ENV
# echo "ONEFLOW_WHEEL_PATH=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/master/cu102/41b06bf56daaa5ea0087998399d5980e9fc5ab59" >> $GITHUB_ENV
- name: Start container
# if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
# working-directory: ${{ env.ONEFLOW_SRC }}
# env:
# ONEFLOW_BIN_PATH: ${{ steps.download-digest.outputs.entry-dir }}/bin
run: |
docker pull ${{ env.TEST_IMG_TAG }}
docker run -d --rm --privileged --network host --shm-size=8g \
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
--runtime=nvidia \
-v /DATA/disk1:/dataset:ro \
-e ONEFLOW_WHEEL_PATH=${{ env.ONEFLOW_WHEEL_PATH }} \
-v $PWD:$PWD \
-w $PWD \
-e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \
-e E2E_GPU_NUM_PER_NODE=${{ github.event.inputs.gpu_num_per_node }} \
-e E2E_NODE_NUM=1 \
-e E2E_BATCH_SIZE=32 \
-e E2E_LEARNING_RATE=1.536 \
-e E2E_SRC_ROOT=Classification/cnns \
-e E2E_DATA_ROOT=/dataset/ImageNet/ofrecord \
--name ${TEST_CONTAINER_NAME} \
${{ env.TEST_IMG_TAG }} \
sleep 3600
# -e ONEFLOW_CI=1 \
# -v /model_zoo:/model_zoo:ro \
# -v $HOME/test-container-cache/dot-local:/root/.local \
# -v $HOME/test-container-cache/dot-cache:/root/.cache \
# -e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \
# -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \
# -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \
# -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \
# -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \
- name: Install OneFlow
# if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }}
run: |
docker exec ${TEST_CONTAINER_NAME} python3 --version
docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${{ needs.build-manylinux.outputs.ONEFLOW_WHEEL_PATH }} oneflow
- name: Test container
# if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
run: |
docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh
- name: Upload log
# if: ${{ always() && (steps.distributed_try_3.outcome=='failure' || steps.new_interface_distributed_try_3.outcome=='failure') && github.event.pull_request.head.repo.full_name == github.repository }}
uses: ./.github/actions/upload_oss
with:
src_path: log
oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log
oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }}
oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }}
upload_core: false
# test:
# name: Test suite
# # needs: [find-test-cache]build-oneflow
# needs: [build-oneflow]
# # runs-on: ['self-hosted', 'linux', 'provision']
# runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v']
# env:
# TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test"
# TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae
# # strategy:
# # fail-fast: true
# # max-parallel: 5
# # matrix: ${{ fromJson(needs.find-test-cache.outputs.matrix) }}
# steps:
# - name: Fix permissions
# # if: ${{ contains(matrix.runs-on, 'self-hosted') }}
# run: |
# set -x
# docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) .
# - name: Checkout Oneflow-Inc/OneFlow-Benchmark
# uses: actions/checkout@v2
# - name: Remove container
# timeout-minutes: 45
# # if: ${{ contains(matrix.runs-on, 'self-hosted') }}
# run: |
# docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true
# - name: Enable Pytorch container
# run: |
# echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV
# # - name: Set environment variables
# # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
# # run: |
# # set -x
# # echo "ONEFLOW_TEST_CACHE_DIR=$HOME/ci-cache/test_cache" >> $GITHUB_ENV
# # echo "ONEFLOW_WHEEL_PATH=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/master/cu102/41b06bf56daaa5ea0087998399d5980e9fc5ab59" >> $GITHUB_ENV
# - name: Start container
# # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
# # working-directory: ${{ env.ONEFLOW_SRC }}
# # env:
# # ONEFLOW_BIN_PATH: ${{ steps.download-digest.outputs.entry-dir }}/bin
# run: |
# docker pull ${{ env.TEST_IMG_TAG }}
# docker run -d --rm --privileged --network host --shm-size=8g \
# --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
# --runtime=nvidia \
# -v /DATA/disk1:/dataset:ro \
# -e ONEFLOW_WHEEL_PATH=${{ env.ONEFLOW_WHEEL_PATH }} \
# -v $PWD:$PWD \
# -w $PWD \
# -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \
# -e E2E_GPU_NUM_PER_NODE=${{ github.event.inputs.gpu_num_per_node }} \
# -e E2E_NODE_NUM=1 \
# -e E2E_BATCH_SIZE=32 \
# -e E2E_LEARNING_RATE=1.536 \
# -e E2E_SRC_ROOT=Classification/cnns \
# -e E2E_DATA_ROOT=/dataset/ImageNet/ofrecord \
# --name ${TEST_CONTAINER_NAME} \
# ${{ env.TEST_IMG_TAG }} \
# sleep 3600
# # -e ONEFLOW_CI=1 \
# # -v /model_zoo:/model_zoo:ro \
# # -v $HOME/test-container-cache/dot-local:/root/.local \
# # -v $HOME/test-container-cache/dot-cache:/root/.cache \
# # -e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \
# # -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \
# # -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \
# # -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \
# # -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \
# - name: Install OneFlow
# # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }}
# run: |
# docker exec ${TEST_CONTAINER_NAME} python3 --version
# docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
# docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${{ needs.build-oneflow.outputs.ONEFLOW_WHEEL_PATH }} oneflow
# - name: Test container
# # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
# run: |
# docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh
# - name: Upload log
# # if: ${{ always() && (steps.distributed_try_3.outcome=='failure' || steps.new_interface_distributed_try_3.outcome=='failure') && github.event.pull_request.head.repo.full_name == github.repository }}
# uses: ./.github/actions/upload_oss
# with:
# src_path: log
# oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log
# oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }}
# oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }}
# upload_core: false