...
 
Commits (5)
    https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/64939ea9d886d375923ad5721cbc10e96f0e8eb9 test 2021-10-28T09:50:54+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/8c1388071951f1ae3eea4206fe4eda6ca910db7e test 2021-10-28T10:08:14+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/f36513a9f359c48356efe79e479d43b1f11320c7 test 2021-10-28T11:07:26+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/3c533d1a0e03ff4f8171e2cb2611f9049f3230ae test 2021-10-28T12:41:32+08:00 ShawnXuan xiexuanx2@gmail.com https://gitcode.net/Oneflow-Inc/OneFlow-Benchmark/-/commit/4b7984b45b09bc5a551c7605b75862ece67b3663 test on KS 2021-10-28T14:34:43+08:00 ShawnXuan xiexuanx2@gmail.com
......@@ -147,76 +147,71 @@ jobs:
python3 ${{ env.ONEFLOW_SRC }}/tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html --index_key=${{ needs.find-oss-wheel.outputs.ONEFLOW_WHEEL_PATH }}/index.html
test:
name: Test suite
needs: [build-oneflow]
name: Test ResNet50
needs: [build-oneflow, find-oss-wheel]
if: always()
runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v']
env:
TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test"
TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ github.event.inputs.compute_platform }}-test"
TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae
steps:
- name: just a test
run: |
echo ${TEST_CONTAINER_NAME}
echo ${TEST_WITH_TORCH_IMG_TAG}
echo ${{ needs.build-oneflow.outputs.ONEFLOW_WHEEL_PATH }}
# - name: Fix permissions
# run: |
# set -x
# docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) .
# - name: Checkout Oneflow-Inc/OneFlow-Benchmark
# uses: actions/checkout@v2
# - name: Remove container
# timeout-minutes: 45
# run: |
# docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true
# - name: Enable Pytorch container
# run: |
# echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV
# - name: Start container
# run: |
# docker pull ${{ env.TEST_IMG_TAG }}
# docker run -d --rm --privileged --network host --shm-size=8g \
# --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
# --runtime=nvidia \
# -v /DATA/disk1:/dataset:ro \
# -e ONEFLOW_WHEEL_PATH=${{ env.ONEFLOW_WHEEL_PATH }} \
# -v $PWD:$PWD \
# -w $PWD \
# -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \
# -e E2E_GPU_NUM_PER_NODE=${{ github.event.inputs.gpu_num_per_node }} \
# -e E2E_NODE_NUM=1 \
# -e E2E_BATCH_SIZE=32 \
# -e E2E_LEARNING_RATE=1.536 \
# -e E2E_SRC_ROOT=Classification/cnns \
# -e E2E_DATA_ROOT=/dataset/ImageNet/ofrecord \
# --name ${TEST_CONTAINER_NAME} \
# ${{ env.TEST_IMG_TAG }} \
# sleep 3600
# # -e ONEFLOW_CI=1 \
# # -v /model_zoo:/model_zoo:ro \
# # -v $HOME/test-container-cache/dot-local:/root/.local \
# # -v $HOME/test-container-cache/dot-cache:/root/.cache \
# # -e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \
# # -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \
# # -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \
# # -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \
# # -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \
# - name: Install OneFlow
# run: |
# docker exec ${TEST_CONTAINER_NAME} python3 --version
# docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
# docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/${{ needs.build-oneflow.outputs.ONEFLOW_WHEEL_PATH }} oneflow
# - name: Test container
# run: |
# docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh
# - name: Upload log
# uses: ./.github/actions/upload_oss
# with:
# src_path: log
# oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log
# oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }}
# oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }}
# upload_core: false
- name: Fix permissions
run: |
set -x
docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) .
- name: Checkout Oneflow-Inc/OneFlow-Benchmark
uses: actions/checkout@v2
- name: Remove container
timeout-minutes: 45
run: |
docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true
- name: Enable Pytorch container
run: |
echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV
- name: Start container
run: |
docker pull ${{ env.TEST_IMG_TAG }}
docker run -d --rm --privileged --network host --shm-size=8g \
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
--runtime=nvidia \
-v /DATA/disk1:/dataset:ro \
-e ONEFLOW_WHEEL_PATH=${{ env.ONEFLOW_WHEEL_PATH }} \
-v $PWD:$PWD \
-w $PWD \
-e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \
-e E2E_GPU_NUM_PER_NODE=${{ github.event.inputs.gpu_num_per_node }} \
-e E2E_NODE_NUM=1 \
-e E2E_BATCH_SIZE=32 \
-e E2E_LEARNING_RATE=1.536 \
-e E2E_SRC_ROOT=Classification/cnns \
-e E2E_DATA_ROOT=/dataset/ImageNet/ofrecord \
--name ${TEST_CONTAINER_NAME} \
${{ env.TEST_IMG_TAG }} \
sleep 3600
# -e ONEFLOW_CI=1 \
# -v /model_zoo:/model_zoo:ro \
# -v $HOME/test-container-cache/dot-local:/root/.local \
# -v $HOME/test-container-cache/dot-cache:/root/.cache \
# -e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \
# -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \
# -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \
# -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \
# -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \
- name: Install OneFlow
run: |
docker exec ${TEST_CONTAINER_NAME} python3 --version
docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=https://staging.oneflow.info/${{ needs.find-oss-wheel.outputs.ONEFLOW_WHEEL_PATH }} oneflow
- name: Run Test
run: |
docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh
- name: Upload log
uses: ./.github/actions/upload_oss
with:
src_path: log
oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log
oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }}
oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }}
upload_core: false
......@@ -31,7 +31,7 @@ python3 ${E2E_SRC_ROOT}/of_cnn_train_val.py \
--nccl_fusion_max_ops=24 \
--gpu_image_decoder=True \
--num_epoch=$E2E_NUM_EPOCHS \
--num_examples=1024 \
--num_examples=1281167 \
--model=${model} 2>&1 | tee ${LOGFILE}
echo "Writting log to ${LOGFILE}"