未验证 提交 0f163678 编写于 作者: S Shenghang Tsai 提交者: GitHub

Build master whl once a day (#3894)

* add cron

* update once a day

* fix cuda args

* port better error msg

* fix gh env

* fix cuda version arg

* change dist

* add flush

* allow CI run up to 20 hours when buiding release

* check in ensure img

* use matrix

* call ensure img

* fix yaml syntax

* use full sha because short not consistent

* turn off continue-on-error

* add matrix_extra_flags for cpu

* add matrix

* refactor matrix

* check env var

* add exist check

* fix condition

* fix exist path

* add more log

* Revert "add more log"

This reverts commit 6e6c494f1b9bfa3104ece69c6ce35d6328a2c231.

* Update version.py
Co-authored-by: NYour Name <you@example.com>
Co-authored-by: Noneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>
上级 9bf1b0fe
name: Release
on:
push:
branches:
- master
paths-ignore:
- '**.md'
- '.gitignore'
schedule:
# beijing: 2 am.
# utc: 6 pm.
- cron: '0 18 * * *'
workflow_dispatch:
inputs:
placeholder:
description: 'placeholder, no effect'
python_version:
description: 'python_version'
default: '3.5,3.6,3.7,3.8'
required: false
jobs:
staging_release:
name: Staging Release
runs-on: [self-hosted, linux]
timeout-minutes: 1200
runs-on: [self-hosted, linux, release]
strategy:
fail-fast: false
max-parallel: 5
matrix:
release_version: [
'cpu',
'cu100',
'cu101',
'cu102',
'cu110',
'cu111',
'cu100_xla',
'cu101_xla',
'cu102_xla',
'cu110_xla',
'cu111_xla'
]
steps:
- name: Fix permissions
run: |
docker run --rm -v $PWD:/p -w /p busybox chmod -R o+w .
- uses: actions/checkout@v2
- name: Build OneFlow
- name: Set environment variables
env:
matrix_release_version: ${{matrix.release_version }}
python_version: ${{ github.event.inputs.python_version }}
OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }}
OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }}
run: |
export ONEFLOW_CI_TMP_DIR=$HOME/ci-tmp-rel-102
docker run --rm -v $ONEFLOW_CI_TMP_DIR/:$HOME/ci-tmp/ busybox rm -rf $HOME/ci-tmp/wheelhouse
export ONEFLOW_CI_PYTHON_VERSION_ARGS=" "
export ONEFLOW_CI_EXTRA_ONEFLOW_CMAKE_ARGS="-DWITH_XLA=OFF"
export ONEFLOW_CI_BUILD_WHEEL_BASH_ARGS=" "
export ONEFLOW_CI_PACKAGE_SUFFIX="_cu102"
if [ -d $ONEFLOW_CI_TMP_DIR/wheelhouse ]; then
echo "existing wheelhouse found"
set -x
if [ "$matrix_release_version" == "cpu" ]; then
cuda_version="10.2"
extra_flags="--cpu"
elif [ "$matrix_release_version" == "cu100" ]; then
cuda_version="10.0"
elif [ "$matrix_release_version" == "cu101" ]; then
cuda_version="10.1"
elif [ "$matrix_release_version" == "cu102" ]; then
cuda_version="10.2"
elif [ "$matrix_release_version" == "cu110" ]; then
cuda_version="11.0"
elif [ "$matrix_release_version" == "cu111" ]; then
cuda_version="11.1"
elif [ "$matrix_release_version" == "cu100_xla" ]; then
cuda_version="10.0"
elif [ "$matrix_release_version" == "cu101_xla" ]; then
cuda_version="10.1"
extra_flags="--xla"
elif [ "$matrix_release_version" == "cu102_xla" ]; then
cuda_version="10.2"
extra_flags="--xla"
elif [ "$matrix_release_version" == "cu110_xla" ]; then
cuda_version="11.0"
extra_flags="--xla"
elif [ "$matrix_release_version" == "cu111_xla" ]; then
cuda_version="11.1"
extra_flags="--xla"
else
echo "Release version: $matrix_release_version not supported"
exit 1
fi
bash ci/build/make.sh
git_hash=$(git rev-parse --short "$GITHUB_SHA")
if [ -z "$cuda_version" ]
then
echo "cuda_version empty"
exit 1
fi
python3 ci/build/ensure_img.py
python_version=${python_version:-"3.5,3.6,3.7,3.8"}
echo "cuda_version=${cuda_version}" >> $GITHUB_ENV
echo "python_version=${python_version}" >> $GITHUB_ENV
echo "extra_flags=${extra_flags}" >> $GITHUB_ENV
tmp_dir=$HOME/ci-tmp-rel
echo "ci_tmp_dir=${tmp_dir}" >> $GITHUB_ENV
echo "wheelhouse_dir=${tmp_dir}/wheelhouse" >> $GITHUB_ENV
git_branch=${GITHUB_REF##*/}
timestamp=$(date '+%Y.%m.%d-%H.%M.%S')
dir=${git_branch}/${timestamp}-${git_hash}
$HOME/ossutil64 cp --update -r $ONEFLOW_CI_TMP_DIR/wheelhouse oss://oneflow-static/staging/${dir}
oss_dir=${git_branch}/${GITHUB_SHA}
echo "oss_dir=${oss_dir}" >> $GITHUB_ENV
echo "git_branch=${git_branch}" >> $GITHUB_ENV
$(python3 tools/oss_file_exist.py --bucket oneflow-staging --path ${oss_dir}/oneflow_${matrix_release_version})
echo "is_built=${OSS_FILE_EXISTED}" >> $GITHUB_ENV
- name: Build OneFlow
if: ${{ env.is_built }} != '1'
uses: ./.github/actions/whl
with:
tmp_dir: ${ci_tmp_dir}
cuda_version: ${cuda_version}
python_version: ${python_version}
extra_flags: ${extra_flags}
- name: Upload wheel
if: ${{ env.is_built }} != '1'
uses: ./.github/actions/upload_oss
with:
src_path: ${wheelhouse_dir}
oss_dst_path: oss://oneflow-staging/${oss_dir}
oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }}
oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }}
- name: Update pip index
if: ${{ env.is_built }} != '1'
env:
OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }}
OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }}
run: |
python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
python3 -m pip install oss2 beautifulsoup4 --user
python3 tools/create_pip_index.py --dir_key staging/${dir} -b oneflow-static --index_key=staging/${git_branch}/pip.index.html
python3 tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${git_branch}/pip.index.html
- name: Update API docs
if: github.ref == 'refs/heads/master'
env:
......
......@@ -65,7 +65,7 @@ jobs:
uses: ./.github/actions/upload_oss
with:
src_path: ${wheelhouse_dir}
oss_dst_path: oss://oneflow-static/staging/pr/${{ github.event.pull_request.number }}/$(date '+%Y.%m.%d-%H.%M.%S')-$(git rev-parse --short "$GITHUB_SHA")-cuda
oss_dst_path: oss://oneflow-staging/pr/${{ github.event.pull_request.number }}/$(date '+%Y.%m.%d-%H.%M.%S')-$(git rev-parse --short "$GITHUB_SHA")-cuda
oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }}
oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }}
- name: Build docker image for testing
......@@ -148,7 +148,7 @@ jobs:
uses: ./.github/actions/upload_oss
with:
src_path: ${wheelhouse_dir}
oss_dst_path: oss://oneflow-static/staging/pr/${{ github.event.pull_request.number }}/$(date '+%Y.%m.%d-%H.%M.%S')-$(git rev-parse --short "$GITHUB_SHA")-xla
oss_dst_path: oss://oneflow-staging/pr/${{ github.event.pull_request.number }}/$(date '+%Y.%m.%d-%H.%M.%S')-$(git rev-parse --short "$GITHUB_SHA")-xla
oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }}
oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }}
- name: Build docker image for testing
......@@ -178,7 +178,7 @@ jobs:
uses: ./.github/actions/upload_oss
with:
src_path: ${wheelhouse_dir}
oss_dst_path: oss://oneflow-static/staging/pr/${{ github.event.pull_request.number }}/$(date '+%Y.%m.%d-%H.%M.%S')-$(git rev-parse --short "$GITHUB_SHA")-cpu
oss_dst_path: oss://oneflow-staging/pr/${{ github.event.pull_request.number }}/$(date '+%Y.%m.%d-%H.%M.%S')-$(git rev-parse --short "$GITHUB_SHA")-cpu
oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }}
oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }}
- name: Build docker image for testing
......
import os
import argparse
from pathlib import Path
import re
import json
import subprocess
def check_and_download(tag, url):
img_dir = os.path.join(os.path.expanduser("~"), "imgs")
if not os.path.exists(img_dir):
os.makedirs(img_dir)
returncode = subprocess.run(
f"docker image inspect {tag}",
shell=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
).returncode
if returncode == 0:
print("[OK]", tag)
else:
basename = os.path.basename(url)
dst = os.path.join(img_dir, basename)
subprocess.check_call(f"wget -c {url} -O {dst}", shell=True)
subprocess.check_call(f"docker load -i {dst}", shell=True)
base = os.path.basename(dst)
base = os.path.splitext(base)[0]
base = os.path.splitext(base)[0]
keep_tag = f"ofkeep:{base}"
subprocess.check_call(f"docker tag {tag} {keep_tag}", shell=True)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--create_index", action="store_true", required=False, default=False
)
args = parser.parse_args()
imgs = [
{
"tag": "nvidia/cuda:10.0-cudnn7-devel-centos7",
"url": "https://oneflow-static.oss-cn-beijing.aliyuncs.com/img/nvidiacuda10.0-cudnn7-devel-centos7.tar.gz",
},
{
"tag": "nvidia/cuda:10.1-cudnn7-devel-centos7",
"url": "https://oneflow-static.oss-cn-beijing.aliyuncs.com/img/nvidiacuda10.1-cudnn7-devel-centos7.tar.gz",
},
{
"tag": "nvidia/cuda:10.2-cudnn7-devel-centos7",
"url": "https://oneflow-static.oss-cn-beijing.aliyuncs.com/img/nvidiacuda10.2-cudnn7-devel-centos7.tar.gz",
},
{
"tag": "nvidia/cuda:11.0-cudnn8-devel-centos7",
"url": "https://oneflow-static.oss-cn-beijing.aliyuncs.com/img/nvidiacuda11.0-cudnn8-devel-centos7.tar.gz",
},
{
"tag": "nvidia/cuda:11.1-cudnn8-devel-centos7",
"url": "https://oneflow-static.oss-cn-beijing.aliyuncs.com/img/nvidiacuda11.1-cudnn8-devel-centos7.tar.gz",
},
]
for img in imgs:
check_and_download(img["tag"], img["url"])
......@@ -44,7 +44,13 @@ def run_cmds(cmds, gpu_num=0, timeout=10, chunk=1, verbose=False):
cmd = cmds.pop()
cuda_visible_devices = ",".join([str(i) for i in gpu_ids_to_occupy])
if verbose:
print("cuda_visible_devices:", cuda_visible_devices, "cmd:", cmd)
print(
"cuda_visible_devices:",
cuda_visible_devices,
"cmd:",
cmd,
flush=True,
)
proc = subprocess.Popen(
cmd,
env=dict(
......
......@@ -6,6 +6,7 @@ from pathlib import Path
def build_arg_env(env_var_name):
val = os.getenv(env_var_name)
assert val, f"system environment variable {env_var_name} found empty"
return f"--build-arg {env_var_name}={val}"
......@@ -251,7 +252,7 @@ if __name__ == "__main__":
img_tag = f"oneflow:manylinux2014-cuda{cuda_version}"
if skip_img == False:
build_img(
args.cuda_version,
cuda_version,
args.oneflow_src_dir,
args.use_tuna,
args.use_system_proxy,
......@@ -320,6 +321,9 @@ gcc --version
except subprocess.CalledProcessError as e:
print("failed: ", e.cmd, e.args)
print("clean: ", cache_dir)
assert cache_dir != None
force_rm_dir(cache_dir)
build()
if cache_dir:
print("start retrying...")
force_rm_dir(cache_dir)
build()
else:
raise ValueError("something went wrong, please look at error above")
......@@ -13,4 +13,4 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
__version__ = "0.3b1"
__version__ = "0.3.0"
import os
import oss2
def check_existence(endpoint, bucket, path):
ki = os.getenv("OSS_ACCESS_KEY_ID")
ks = os.getenv("OSS_ACCESS_KEY_SECRET")
auth = oss2.Auth(ki, ks)
bucket_obj = oss2.Bucket(auth, endpoint, bucket)
files = bucket_obj.list_objects(path)
file_cnt = 0
for f in files.object_list:
file_cnt += 1
is_existed = bucket_obj.object_exists(path) or file_cnt > 0
if is_existed:
print("export OSS_FILE_EXISTED=1")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"-e",
"--endpoint",
type=str,
required=False,
default="oss-cn-beijing.aliyuncs.com",
)
parser.add_argument("--bucket", type=str, required=True)
parser.add_argument("--path", type=str, required=True)
args = parser.parse_args()
check_existence(args.endpoint, args.bucket, args.path)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册