diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 73e433a16a01a0d13997dff01d63b6e04162b44d..6d74a8567fcc57d4235f74aad14296193fa6326d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -75,6 +75,20 @@ stages: run_end_to_end: false container: flink-build-container jdk: jdk8 + - job: docs_404_check # run on a MSFT provided machine + pool: + vmImage: 'ubuntu-16.04' + steps: + - task: UseRubyVersion@0 + inputs: + versionSpec: '= 2.4' + addToPath: true + - script: ./tools/ci/docs.sh + # upload spider.log for debugging + - task: PublishPipelineArtifact@1 + inputs: + targetPath: ./docs/spider.log + artifact: spider.log # CI / Special stage for release, e.g. building PyFlink wheel packages, etc: - stage: ci_release displayName: "CI build (release)" diff --git a/docs/check_links.sh b/docs/check_links.sh index 5d9f7628c472cff5b190ae70784d1e0a40c4dacf..74e4b7f47e50dbd1daeb25213d90f0c1af181c01 100755 --- a/docs/check_links.sh +++ b/docs/check_links.sh @@ -17,10 +17,20 @@ # limitations under the License. ################################################################################ +DOCS_CHECK_DIR="`dirname \"$0\"`" # relative +DOCS_CHECK_DIR="`( cd \"$DOCS_CHECK_DIR\" && pwd -P)`" # absolutized and normalized +if [ -z "$DOCS_CHECK_DIR" ] ; then + # error; for some reason, the path is not accessible + # to the script (e.g. permissions re-evaled after suid) + exit 1 # fail +fi + +echo "Check docs directory: $DOCS_CHECK_DIR" + target=${1:-"http://localhost:4000"} # Crawl the docs, ignoring robots.txt, storing nothing locally -wget --spider -r -nd -nv -e robots=off -p -o spider.log "$target" +wget --spider -r -nd -nv -e robots=off -p -o $DOCS_CHECK_DIR/spider.log "$target" # Abort for anything other than 0 and 4 ("Network failure") status=$? @@ -29,11 +39,24 @@ if [ $status -ne 0 ] && [ $status -ne 4 ]; then fi # Fail the build if any broken links are found -broken_links_str=$(grep -e 'Found [[:digit:]]\+ broken links' spider.log) +broken_links_str=$(grep -e 'broken link' $DOCS_CHECK_DIR/spider.log) if [ -n "$broken_links_str" ]; then - grep -B 1 "Remote file does not exist -- broken link!!!" spider.log + grep -B 1 "Remote file does not exist -- broken link!!!" $DOCS_CHECK_DIR/spider.log echo "---------------------------------------------------------------------------" echo -e "$broken_links_str" echo "Search for page containing broken link using 'grep -R BROKEN_PATH DOCS_DIR'" exit 1 fi + +# Fail the build if any broken links are found for Chinese +broken_links_str=$(grep -e '死链接' $DOCS_CHECK_DIR/spider.log) +if [ -n "$broken_links_str" ]; then + grep -B 1 "远程文件不存在 -- 链接失效!!!" $DOCS_CHECK_DIR/spider.log + echo "---------------------------------------------------------------------------" + echo -e "$broken_links_str" + echo "Search for page containing broken link using 'grep -R BROKEN_PATH DOCS_DIR'" + exit 1 +fi + +echo 'All links in docs are valid!' +exit 0 diff --git a/tools/azure-pipelines/build-apache-repo.yml b/tools/azure-pipelines/build-apache-repo.yml index 5c471e9eb39c8934de7bf0a29258a276bfa431d9..f0767a0e6619de9aeccfee9885856373fe1a585b 100644 --- a/tools/azure-pipelines/build-apache-repo.yml +++ b/tools/azure-pipelines/build-apache-repo.yml @@ -68,6 +68,35 @@ stages: run_end_to_end: false container: flink-build-container jdk: jdk8 + - job: docs_404_check # run on a MSFT provided machine + pool: + vmImage: 'ubuntu-16.04' + steps: + # Skip docs check if this is a pull request doesn't contain documentation change + - bash: | + source ./tools/azure-pipelines/build_properties.sh + contains_docs_pullrequest + if [[ "$?" == 0 ]] ; then + echo "##[debug]This is a pull request doesn't contain documentation. Skipping docs check." + echo "##vso[task.setvariable variable=skip;]1" + else + echo "##[debug]This is a regular CI build. Continuing ..." + echo "##vso[task.setvariable variable=skip;]0" + fi + displayName: Check if PR contains docs change + - task: UseRubyVersion@0 + condition: not(eq(variables['SKIP'], '1')) + inputs: + versionSpec: '= 2.4' + addToPath: true + - script: ./tools/ci/docs.sh + condition: not(eq(variables['SKIP'], '1')) + # upload spider.log for debugging + - task: PublishPipelineArtifact@1 + condition: not(eq(variables['SKIP'], '1')) + inputs: + targetPath: ./docs/spider.log + artifact: spider.log # Special stage for nightly builds: - stage: cron_build displayName: "Cron build" @@ -135,14 +164,14 @@ stages: container: flink-build-container jdk: jdk11 - job: docs_404_check # run on a MSFT provided machine - pool: + pool: vmImage: 'ubuntu-16.04' steps: - - task: UseRubyVersion@0 - inputs: - versionSpec: '= 2.4' - addToPath: true - - script: ./tools/ci/docs.sh + - task: UseRubyVersion@0 + inputs: + versionSpec: '= 2.4' + addToPath: true + - script: ./tools/ci/docs.sh - template: build-python-wheels.yml parameters: stage_name: cron_python_wheels diff --git a/tools/azure-pipelines/build_properties.sh b/tools/azure-pipelines/build_properties.sh index 34bdf4254f59cc671f270e2b37b27e5e262bd6ff..073dc1e0e2132a98637367513840ddc2eb77d62e 100755 --- a/tools/azure-pipelines/build_properties.sh +++ b/tools/azure-pipelines/build_properties.sh @@ -14,20 +14,16 @@ # See the License for the specific language governing permissions and # limitations under the License. - -# -# Returns 0 if the change is a documentation-only pull request -# -function is_docs_only_pullrequest() { +function github_num_commits() { # check if it is a pull request branch, as generated by ci-bot: if [[ ! $BUILD_SOURCEBRANCHNAME == ci_* ]] ; then echo "INFO: Not a pull request."; - return 1 + return 0 fi PR_ID=`echo "$BUILD_SOURCEBRANCHNAME" | cut -f2 -d_` if ! [[ "$PR_ID" =~ ^[0-9]+$ ]] ; then echo "ERROR: Extracted PR_ID is not a number, but this: '$PR_ID'" - return 1 + return 0 fi # check if it is docs only pull request # 1. Get PR details @@ -40,12 +36,25 @@ function is_docs_only_pullrequest() { if [[ "$GITHUB_PULL_HEAD_SHA" != "$THIS_BRANCH_SHA" ]] ; then echo "INFO: SHA mismatch: GITHUB_PULL_HEAD_SHA=$GITHUB_PULL_HEAD_SHA != THIS_BRANCH_SHA=$THIS_BRANCH_SHA"; # sha mismatch. There's some timing issue, and we can't trust the result - return 1 + return 0 fi # 3. Get number of commits in PR GITHUB_NUM_COMMITS=`echo $GITHUB_PULL_DETAIL | jq -r ".commits"` + return $GITHUB_NUM_COMMITS +} + +# +# Returns 0 if the change is a documentation-only pull request +# +function is_docs_only_pullrequest() { + github_num_commits + GITHUB_NUM_COMMITS=$? + if [[ $GITHUB_NUM_COMMITS == 0 ]]; then + return 1 + fi + if [[ $(git diff --name-only HEAD..HEAD~$GITHUB_NUM_COMMITS | grep -v "docs/") == "" ]] ; then echo "INFO: This is a docs only change. Changed files:" git diff --name-only HEAD..HEAD~$GITHUB_NUM_COMMITS @@ -53,3 +62,21 @@ function is_docs_only_pullrequest() { fi return 1 } + +# +# Returns 1 if the change contains documentation pull request +# +function contains_docs_pullrequest() { + github_num_commits + GITHUB_NUM_COMMITS=$? + if [[ $GITHUB_NUM_COMMITS == 0 ]]; then + return 0 + fi + + if [[ $(git diff --name-only HEAD..HEAD~"$GITHUB_NUM_COMMITS" | grep "docs/") != "" ]] ; then + echo "INFO: This is a change contains docs. Changed files:" + git diff --name-only HEAD..HEAD~"$GITHUB_NUM_COMMITS" + return 1 + fi + return 0 +} \ No newline at end of file