update ci, clean build/

6cbb1730 · mli · b70c2db4 · 6cbb1730 · 6cbb1730 · 6cbb1730
13 changed file
--- a/.gitmodules
+++ b/.gitmodules
 [submodule "build/mx-theme"]
 	path = build/mx-theme
 	url = https://github.com/mli/mx-theme
+[submodule "build/utils"]
+	path = build/utils
+	url = https://github.com/d2l-ai/utils
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -2,14 +2,15 @@ stage("Build and Publish") {
  node {
    ws('workspace/d2l-zh') {
 	  checkout scm
-      sh "build/build_all.sh"
-      sh """#!/bin/bash
-      set -e
-      if [[ ${env.BRANCH_NAME} == master ]]; then
-          build/upload.sh
-      fi
-      """
-	}
+      sh "git submodule update --init"
+      sh "build/utils/clean_build.sh"
+      sh "conda env update -f build/env.yml"
+      sh "build/utils/build_html.sh zh"
+      sh "build/utils/build_pdf.sh zh"
+      sh "build/utils/build_pkg.sh zh"
+      if (env.BRANCH_NAME == 'master') {
+        sh "build/utils/publish_website.sh zh"
+      }
+    }
  }
 }
-
--- a/Makefile
+++ b/Makefile
 all: html

-build/%.ipynb: %.md build/env.yml build/md2ipynb.py $(wildcard gluonbook/*)
+build/%.ipynb: %.md build/env.yml $(wildcard gluonbook/*)
 	@mkdir -p $(@D)
-	cd $(@D); python ../md2ipynb.py ../../$< ../../$@
+	cd $(@D); python ../utils/md2ipynb.py ../../$< ../../$@

 build/%.md: %.md
 	@mkdir -p $(@D)
@@ -62,10 +62,9 @@ pdf: $(DEPS) $(OBJ) $(PDFIMG)
 	sed -i /\\\\sphinxtablecontinued{Continued\ on\ next\ page}/d $(TEX)
 	sed -i /{\\\\tablename\\\\\ \\\\thetable{}\ --\ continued\ from\ previous\ page}/d $(TEX)
 	cd build/_build/latex && \
-	bash ../../convert_output_svg.sh && \
+	bash ../../utils/convert_output_svg.sh && \
 	buf_size=10000000 xelatex d2l-zh.tex && \
 	buf_size=10000000 xelatex d2l-zh.tex

 clean:
 	rm -rf build/chapter* build/_build build/img build/data build/environment.yml build/README.md $(PKG)
-
--- a/build/build_all.sh
+++ b/build/build_all.sh
-#!/bin/bash
-set -ex
-
-tik=$(date +%s)
-
-[ -e build/data-bak ] && rm -rf build/data-bak
-
-# Clean build/chapter*/*ipynb and build/chapter*/*md that are no longer needed.
-cd build
-for ch in chapter*; do
-    if ! [ -e "../$ch" ]; then
-        rm -rf $ch
-    else
-        shopt -s nullglob
-        for f in $ch/*.md $ch/*.ipynb; do
-            base=$(basename $f)
-            md=${base%%.*}.md
-            if ! [ -e "../$ch/$md" ]; then
-                rm $f
-            fi
-        done
-    fi
-done
-# Clean images that are no longer needed.
-shopt -s nullglob
-for f in img/*.svg img/*.jpg img/*.png; do
-    if ! [ -e "../$f" ]; then
-        rm $f
-    fi
-done
-cd ..
-
-
-git submodule update --init
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
-
-conda env update -f build/env.yml
-conda activate d2l-zh-build
-
-pip list
-rm -rf build/_build/
-
-make html
-
-make pdf
-cp build/_build/latex/d2l-zh.pdf build/_build/html/
-
-[ -e build/_build/latex/d2l-zh.aux ] && rm build/_build/latex/d2l-zh.aux
-[ -e build/_build/latex/d2l-zh.idx ] && rm build/_build/latex/d2l-zh.idx
-
-# avoid putting data downloaded by scripts into the notebook package
-mv build/data build/data-bak
-make pkg
-# backup build/data to avoid download the dataset each time and put the
-rm -rf build/data
-mv build/data-bak build/data
-
-# For 1.0
-cp build/_build/html/d2l-zh.zip build/_build/html/d2l-zh-1.0.zip
-
-# Time it
-tok=$(date +%s)
-runtime=$((tok-tik))
-convertsecs() {
-	((h=${1}/3600))
-	((m=(${1}%3600)/60))
-	((s=${1}%60))
-	printf "%02d:%02d:%02d\n" $h $m $s
-}
-echo $(convertsecs $runtime)
--- a/build/convert_output_svg.sh
+++ b/build/convert_output_svg.sh
-set -x
-set -e
-for f in *.svg; do
-    rsvg-convert -f pdf -z 0.80 -o ${f%.svg}.pdf $f
-done
--- a/build/ipynb2mdd.sh
+++ b/build/ipynb2mdd.sh
-#!/bin/bash
-
-MD="mdd"
-CH="ch.md"
-
-[ -e $MD ] && rm -rf $MD
-mkdir $MD
-
-# Collect files.
-cp index.rst $MD/
-cp -R img $MD/
-for f in chapter*/*; do
-	dir=$(dirname "$f")
-	if [ "${f##*.}" = "md" ] || [ "${f##*.}" = "ipynb" ]; then
-		mkdir -p $MD/$dir
-		cp $f $MD/$f
-	fi
-done
-
-# ipynb to md.
-for f in $MD/chapter*/*ipynb; do
-    base=$(basename $f)
-    jupyter nbconvert --to markdown $f --output "${base%%.*}.md"
-	rm $f
-done
-
-for f in $MD/chapter*/*md; do
-	dir=$(dirname "$f")
-	# Remove inner link.
-	sed -i 's/\[\([^]]*\)\]([^\)]*.md)/\1/g' $f
-	# Refer pdf instead of svg.
-	sed -i s/\\.svg/.pdf/g $f
-	# Refer img in the same level.
-	sed -i 's/\](..\/img/\](img/g' $f
-	if [ "$f" != "$dir/index.md" ]; then
-		sed -i s/#\ /##\ /g $f
-	fi
-done
-
-# Convert svg to pdf.
-for f in $MD/img/*svg; do
-	rsvg-convert -f pdf -z 0.80 -o "${f%%.*}.pdf" $f
-	rm $f
-done
-
-# Concat sections in each chapter.
-for f in $MD/chapter*/index.md; do
-	sections=$(python -c 'import mdd_utils; print(mdd_utils.get_sections())' $f)
-	dir=$(dirname "$f")
-	chapter=$dir/$CH
-	cat $f $sections > $chapter
-	perl -i -0777 -pe 's/```eval_rst[^`]+```//ge' $chapter
-done
-
-chapters=$(python -c 'import mdd_utils; print(mdd_utils.get_chapters())' $MD/index.rst)
-i=1
-for chapter in $chapters; do
-	# Move matplotlib plots outside.
-	mv $MD/$chapter/*_files $MD/
-	# Move ch.md to ../ch0x.md
-	mv $MD/$chapter/$CH $MD/ch$(printf %02d $i).md
-	rm -rf $MD/$chapter
-	i=$((i + 1))
-done
-
-# Convert matplotlib-generated svg to pdf.
-for f in $MD/*_files/*svg; do
-	rsvg-convert -f pdf -z 0.80 -o "${f%%.*}.pdf" $f
-	rm $f
-done
-
-rm $MD/toc.rst
-
-# zip files.
-[ -e "$MD.zip" ] && rm "$MD.zip"
-zip -r "$MD.zip" $MD
-[ -e $MD ] && rm -rf $MD
--- a/build/lint.sh
+++ b/build/lint.sh
-#!/bin/bash                                                                                                   
-
-# Prerequisite: pip install nblint
-
-OUT=outlint
-
-[ -e $OUT ] && rm $OUT
-
-for f in build/chapter*/*.ipynb; do
-	echo '===' $f
-	echo '===' $f >> $OUT
-	nblint --linter pyflakes $f >> $OUT 
-	nblint $f >> $OUT
-done
-
-# E302 expected 2 blank lines, found 1
-# E305 expected 2 blank lines after class or function definition, found 1
-# E402 module level import not at top of file
-# E703 statement ends with a semicolon
-# E741 ambiguous variable name
-IGNORE=( 'E302' 
-		 'E305'
-		 'E402' 
-		 'E703'
-		 'E741' )
-
-for ign in "${IGNORE[@]}"; do
-	sed -i /$ign/d $OUT
-done
--- a/build/md2ipynb.py
+++ b/build/md2ipynb.py
-import sys
-import os
-import time
-import notedown
-import nbformat
-
-assert len(sys.argv) == 3, 'usage: input.md output.ipynb'
-
-def is_ascii(character):
-    return ord(character) <= 128
-
-def add_space_between_ascii_and_non_ascii(string):
-    punc = {' ', '\n', '\t', '\r', '，', '。', '？', '！', '、',
-            '；', '：', '“', '”', '（', '）', '【', '】', '—',
-            '…', '《', '》', '`', '(', ')', '[', ']', ',', '.',
-            '?', '!', ';', ':', '\'', '"'}
-    if len(string) == 0:
-        return ''
-
-    ret = []
-    # We don't allow space within figure cpations, such as ![](). 
-    is_fig_caption = False
-    num_left_brackets = 0
-    for i in range(len(string) - 1):
-        cur_char = string[i]
-        next_char = string[i + 1]
-        if cur_char == '[':
-            if i > 0 and string[i - 1] == '!':
-                is_fig_caption = True
-            else:
-                num_left_brackets += 1
-        elif cur_char == ']':
-            if num_left_brackets > 0:
-                num_left_brackets -= 1
-            else:
-                is_fig_caption = False
-
-        ret.append(cur_char)
-        if ((is_ascii(cur_char) != is_ascii(next_char))
-            and (cur_char not in punc)
-            and (next_char not in punc)
-            and not is_fig_caption):
-            ret.append(' ')
-
-    ret.append(string[-1])
-    return ''.join(ret)
-
-# timeout for each notebook, in sec
-timeout = 20 * 60
-
-# the files will be ingored for execution
-ignore_execution = []
-
-input_fn = sys.argv[1]
-output_fn = sys.argv[2]
-
-reader = notedown.MarkdownReader(match='strict')
-
-do_eval = int(os.environ.get('EVAL', True))
-
-# read
-with open(input_fn, 'r') as f:
-    notebook = reader.read(f)
-
-for c in notebook.cells:
-    c.source = add_space_between_ascii_and_non_ascii(c.source)
-
-if do_eval and not any([i in input_fn for i in ignore_execution]):
-    tic = time.time()
-    notedown.run(notebook, timeout)
-    print('=== Finished evaluation in %f sec'%(time.time()-tic))
-
-# write
-# need to add language info to for syntax highlight
-notebook['metadata'].update({'language_info':{'name':'python'}})
-
-with open(output_fn, 'w') as f:
-    f.write(nbformat.writes(notebook))
--- a/build/mdd_utils.py
+++ b/build/mdd_utils.py
-import os
-import sys
-
-def get_sections():
-    assert len(sys.argv) == 2
-    index_md = sys.argv[1]
-    dirname = os.path.dirname(index_md)
-
-    start = False
-    sections = []
-    with open(index_md) as f:
-        for line in f:
-            line = line.rstrip().lstrip()
-            if ':maxdepth:' in line:
-                start = True
-                continue
-            elif line == '```':
-                break
-            if start and len(line) > 1:
-                sections.append(os.path.join(dirname, line + '.md'))
-    return ' '.join(sections)
-
-
-def get_chapters():
-    assert len(sys.argv) == 2
-    index_md = sys.argv[1]
-
-    start = False
-    chapters = []
-    with open(index_md) as f:
-        for line in f:
-            line = line.rstrip().lstrip()
-            if ':maxdepth:' in line:
-                start = True
-                continue
-            elif line == '```':
-                break
-            if start and len(line) > 1:
-                chapters.append(line.split('/')[0])
-    return ' '.join(chapters)
-
--- a/build/translate.sh
+++ b/build/translate.sh
-#!/bin/bash
-
-pre() {
-	echo "Pre-processing markdown files in source lauguage.";
-	for f in chapter*/*.md; do
-		echo $f
-		sed -i s/\.python\ \.input/\.python-\.input/g $f
-		sed -i s/\.input\ \ n=/\.input-n=/g $f
-		sed -i s/\</%%%less-than%%%/g $f
-		sed -i s/\&/%%%ampersand%%%/g $f
-	done
-}
-
-extract() {
-	echo "Convert markdown files into xliff (in source language) and skeleton files.";
-	BSL="bookSrcLang"
-	[ -e $BSL ] && rm -rf $BSL
-	mkdir -p $BSL
-	for f in chapter*/*.md; do
-		echo $f
-		xlf="${f%%.*}.xlf"
-		sklmd="${f%%.*}.skl.md"
-		./md2xliff/bin/extract $f $xlf $sklmd 'zh-CN' 'en-US'
-		# Generate bookSrcLang that contains only xlf files.
-		dir=$(dirname "$f")
-		mkdir -p $BSL/$dir
-		base=$(basename $f)
-		xlf_base="${base%%.*}.xlf"
-		cp $xlf $BSL/$dir/$xlf_base
-	done
-}
-
-reconstruct() {
-	echo "Convert xliff (in target language) and skeleton files into markdown files.";
-	BTL="bookTgtLang"
-	for f in chapter*/*.xlf; do
-		echo $f
-		# Load xlf files from translated dir.
-		cp $BTL/$f $f
-		md="${f%%.*}.md"
-		sklmd="${f%%.*}.skl.md"
-		./md2xliff/bin/xliff-reconstruct $f $sklmd $md
-	#rm $f
-	#rm $sklmd
-	done
-}
-
-post() {
-	echo "Post-processing markdown files in target language.";
-	for f in chapter*/*.md; do
-		echo $f
-		sed -i s/\.python-\.input/\.python\ \.input/g $f
-		sed -i s/\.input-n=/\.input\ \ n=/g $f
-		sed -i s/%%%less-than%%%/\</g $f
-		sed -i s/%%%ampersand%%%/\\\&/g $f
-	done
-}
-
-"$@"
--- a/build/upload.sh
+++ b/build/upload.sh
-#!/bin/bash
-set -e
-
-conda activate d2l-zh-build
-
-# BUCKET=s3://zh.diveintodeeplearning.org
-# BUCKET=s3://diveintodeeplearning-staging
-
-DIR=build/_build/html/
-
-aws s3 sync --delete $DIR s3://zh.d2l.ai --acl 'public-read' --quiet
-aws s3 sync --delete $DIR s3://zh.diveintodeeplearning.org --acl 'public-read' --quiet
-
-#find $DIR \( -iname '*.css' -o -iname '*.js' \) -exec gzip -9 -n {} \; -exec mv {}.gz {} \;
-
-#aws s3 sync --exclude '*.*' --include '*.css' \
-#      --content-type 'text/css' \
-#      --content-encoding 'gzip' \
-#      --acl 'public-read' \
-#      $DIR $BUCKET
-
-#aws s3 sync --exclude '*.*' --include '*.woff' --include '*.woff2' \
-#      --expires "$(date -d '+24 months' --utc +'%Y-%m-%dT%H:%M:%SZ')" \
-#      --acl 'public-read' --quiet \
-#      $DIR $BUCKET
-
-#aws s3 sync --exclude '*.*' --include '*.js' \
-#      --content-type 'application/javascript' \
-#      --content-encoding 'gzip' \
-#      --acl 'public-read' \
-#      $DIR $BUCKET
-
-#aws s3 sync --delete $DIR $BUCKET --acl 'public-read' --quiet
--- a/utils @ 6071cc87
+++ b/utils @ 6071cc87
+Subproject commit 6071cc8794137727e2d4a582358b59ad507560cd
--- a/build/win_batch_md2ipynb.py
+++ b/build/win_batch_md2ipynb.py
-from distutils.dir_util import copy_tree
-import glob
-import nbformat
-import notedown
-import os
-from subprocess import check_output
-import sys
-import time
-
-# To access data/imgs/gluonbook in upper level.
-os.chdir('build')
-
-def mkdir_if_not_exist(path):
-    if not os.path.exists(os.path.join(*path)):
-        os.makedirs(os.path.join(*path))
-
-# Timeout for each notebook, in sec
-timeout = 60 * 60
-
-# The files will be ingored for execution
-ignore_execution = ['chapter_computational-performance/async-computation.md']
-
-reader = notedown.MarkdownReader(match='strict')
-
-do_eval = int(os.environ.get('EVAL', True))
-
-
-for chap in glob.glob(os.path.join('..', 'chapter_*')):
-    mkdir_if_not_exist(['win_ipynb', chap[3:]])
-    mds = filter(lambda x: x.endswith('md'), os.listdir(chap))
-    for md in mds:
-        if md != 'index.md':
-            in_md = os.path.join(chap, md)
-            out_nb = os.path.join('win_ipynb', in_md[3:-2] + 'ipynb')
-
-            if not os.path.exists(out_nb):
-
-                print('---', in_md[3:])
-                # read
-                with open(in_md, 'r', encoding="utf8") as f:
-                    notebook = reader.read(f)
-
-                if do_eval and chap[3:] + '/' + md not in ignore_execution:
-                    tic = time.time()
-                    notedown.run(notebook, timeout)
-                    print('=== Finished evaluation in %f sec'%(time.time()-tic))
-
-                # write
-                # need to add language info to for syntax highlight
-                notebook['metadata'].update({'language_info':{'name':'python'}})
-
-                with open(out_nb, 'w', encoding="utf8") as f:
-                    f.write(nbformat.writes(notebook))
-
-