提交 6cbb1730 编写于 作者: M mli

update ci, clean build/

上级 b70c2db4
[submodule "build/mx-theme"]
path = build/mx-theme
url = https://github.com/mli/mx-theme
[submodule "build/utils"]
path = build/utils
url = https://github.com/d2l-ai/utils
......@@ -2,14 +2,15 @@ stage("Build and Publish") {
node {
ws('workspace/d2l-zh') {
checkout scm
sh "build/build_all.sh"
sh """#!/bin/bash
set -e
if [[ ${env.BRANCH_NAME} == master ]]; then
build/upload.sh
fi
"""
}
sh "git submodule update --init"
sh "build/utils/clean_build.sh"
sh "conda env update -f build/env.yml"
sh "build/utils/build_html.sh zh"
sh "build/utils/build_pdf.sh zh"
sh "build/utils/build_pkg.sh zh"
if (env.BRANCH_NAME == 'master') {
sh "build/utils/publish_website.sh zh"
}
}
}
}
all: html
build/%.ipynb: %.md build/env.yml build/md2ipynb.py $(wildcard gluonbook/*)
build/%.ipynb: %.md build/env.yml $(wildcard gluonbook/*)
@mkdir -p $(@D)
cd $(@D); python ../md2ipynb.py ../../$< ../../$@
cd $(@D); python ../utils/md2ipynb.py ../../$< ../../$@
build/%.md: %.md
@mkdir -p $(@D)
......@@ -62,10 +62,9 @@ pdf: $(DEPS) $(OBJ) $(PDFIMG)
sed -i /\\\\sphinxtablecontinued{Continued\ on\ next\ page}/d $(TEX)
sed -i /{\\\\tablename\\\\\ \\\\thetable{}\ --\ continued\ from\ previous\ page}/d $(TEX)
cd build/_build/latex && \
bash ../../convert_output_svg.sh && \
bash ../../utils/convert_output_svg.sh && \
buf_size=10000000 xelatex d2l-zh.tex && \
buf_size=10000000 xelatex d2l-zh.tex
clean:
rm -rf build/chapter* build/_build build/img build/data build/environment.yml build/README.md $(PKG)
#!/bin/bash
set -ex
tik=$(date +%s)
[ -e build/data-bak ] && rm -rf build/data-bak
# Clean build/chapter*/*ipynb and build/chapter*/*md that are no longer needed.
cd build
for ch in chapter*; do
if ! [ -e "../$ch" ]; then
rm -rf $ch
else
shopt -s nullglob
for f in $ch/*.md $ch/*.ipynb; do
base=$(basename $f)
md=${base%%.*}.md
if ! [ -e "../$ch/$md" ]; then
rm $f
fi
done
fi
done
# Clean images that are no longer needed.
shopt -s nullglob
for f in img/*.svg img/*.jpg img/*.png; do
if ! [ -e "../$f" ]; then
rm $f
fi
done
cd ..
git submodule update --init
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
conda env update -f build/env.yml
conda activate d2l-zh-build
pip list
rm -rf build/_build/
make html
make pdf
cp build/_build/latex/d2l-zh.pdf build/_build/html/
[ -e build/_build/latex/d2l-zh.aux ] && rm build/_build/latex/d2l-zh.aux
[ -e build/_build/latex/d2l-zh.idx ] && rm build/_build/latex/d2l-zh.idx
# avoid putting data downloaded by scripts into the notebook package
mv build/data build/data-bak
make pkg
# backup build/data to avoid download the dataset each time and put the
rm -rf build/data
mv build/data-bak build/data
# For 1.0
cp build/_build/html/d2l-zh.zip build/_build/html/d2l-zh-1.0.zip
# Time it
tok=$(date +%s)
runtime=$((tok-tik))
convertsecs() {
((h=${1}/3600))
((m=(${1}%3600)/60))
((s=${1}%60))
printf "%02d:%02d:%02d\n" $h $m $s
}
echo $(convertsecs $runtime)
set -x
set -e
for f in *.svg; do
rsvg-convert -f pdf -z 0.80 -o ${f%.svg}.pdf $f
done
#!/bin/bash
MD="mdd"
CH="ch.md"
[ -e $MD ] && rm -rf $MD
mkdir $MD
# Collect files.
cp index.rst $MD/
cp -R img $MD/
for f in chapter*/*; do
dir=$(dirname "$f")
if [ "${f##*.}" = "md" ] || [ "${f##*.}" = "ipynb" ]; then
mkdir -p $MD/$dir
cp $f $MD/$f
fi
done
# ipynb to md.
for f in $MD/chapter*/*ipynb; do
base=$(basename $f)
jupyter nbconvert --to markdown $f --output "${base%%.*}.md"
rm $f
done
for f in $MD/chapter*/*md; do
dir=$(dirname "$f")
# Remove inner link.
sed -i 's/\[\([^]]*\)\]([^\)]*.md)/\1/g' $f
# Refer pdf instead of svg.
sed -i s/\\.svg/.pdf/g $f
# Refer img in the same level.
sed -i 's/\](..\/img/\](img/g' $f
if [ "$f" != "$dir/index.md" ]; then
sed -i s/#\ /##\ /g $f
fi
done
# Convert svg to pdf.
for f in $MD/img/*svg; do
rsvg-convert -f pdf -z 0.80 -o "${f%%.*}.pdf" $f
rm $f
done
# Concat sections in each chapter.
for f in $MD/chapter*/index.md; do
sections=$(python -c 'import mdd_utils; print(mdd_utils.get_sections())' $f)
dir=$(dirname "$f")
chapter=$dir/$CH
cat $f $sections > $chapter
perl -i -0777 -pe 's/```eval_rst[^`]+```//ge' $chapter
done
chapters=$(python -c 'import mdd_utils; print(mdd_utils.get_chapters())' $MD/index.rst)
i=1
for chapter in $chapters; do
# Move matplotlib plots outside.
mv $MD/$chapter/*_files $MD/
# Move ch.md to ../ch0x.md
mv $MD/$chapter/$CH $MD/ch$(printf %02d $i).md
rm -rf $MD/$chapter
i=$((i + 1))
done
# Convert matplotlib-generated svg to pdf.
for f in $MD/*_files/*svg; do
rsvg-convert -f pdf -z 0.80 -o "${f%%.*}.pdf" $f
rm $f
done
rm $MD/toc.rst
# zip files.
[ -e "$MD.zip" ] && rm "$MD.zip"
zip -r "$MD.zip" $MD
[ -e $MD ] && rm -rf $MD
#!/bin/bash
# Prerequisite: pip install nblint
OUT=outlint
[ -e $OUT ] && rm $OUT
for f in build/chapter*/*.ipynb; do
echo '===' $f
echo '===' $f >> $OUT
nblint --linter pyflakes $f >> $OUT
nblint $f >> $OUT
done
# E302 expected 2 blank lines, found 1
# E305 expected 2 blank lines after class or function definition, found 1
# E402 module level import not at top of file
# E703 statement ends with a semicolon
# E741 ambiguous variable name
IGNORE=( 'E302'
'E305'
'E402'
'E703'
'E741' )
for ign in "${IGNORE[@]}"; do
sed -i /$ign/d $OUT
done
import sys
import os
import time
import notedown
import nbformat
assert len(sys.argv) == 3, 'usage: input.md output.ipynb'
def is_ascii(character):
return ord(character) <= 128
def add_space_between_ascii_and_non_ascii(string):
punc = {' ', '\n', '\t', '\r', ',', '。', '?', '!', '、',
';', ':', '“', '”', '(', ')', '【', '】', '—',
'…', '《', '》', '`', '(', ')', '[', ']', ',', '.',
'?', '!', ';', ':', '\'', '"'}
if len(string) == 0:
return ''
ret = []
# We don't allow space within figure cpations, such as ![]().
is_fig_caption = False
num_left_brackets = 0
for i in range(len(string) - 1):
cur_char = string[i]
next_char = string[i + 1]
if cur_char == '[':
if i > 0 and string[i - 1] == '!':
is_fig_caption = True
else:
num_left_brackets += 1
elif cur_char == ']':
if num_left_brackets > 0:
num_left_brackets -= 1
else:
is_fig_caption = False
ret.append(cur_char)
if ((is_ascii(cur_char) != is_ascii(next_char))
and (cur_char not in punc)
and (next_char not in punc)
and not is_fig_caption):
ret.append(' ')
ret.append(string[-1])
return ''.join(ret)
# timeout for each notebook, in sec
timeout = 20 * 60
# the files will be ingored for execution
ignore_execution = []
input_fn = sys.argv[1]
output_fn = sys.argv[2]
reader = notedown.MarkdownReader(match='strict')
do_eval = int(os.environ.get('EVAL', True))
# read
with open(input_fn, 'r') as f:
notebook = reader.read(f)
for c in notebook.cells:
c.source = add_space_between_ascii_and_non_ascii(c.source)
if do_eval and not any([i in input_fn for i in ignore_execution]):
tic = time.time()
notedown.run(notebook, timeout)
print('=== Finished evaluation in %f sec'%(time.time()-tic))
# write
# need to add language info to for syntax highlight
notebook['metadata'].update({'language_info':{'name':'python'}})
with open(output_fn, 'w') as f:
f.write(nbformat.writes(notebook))
import os
import sys
def get_sections():
assert len(sys.argv) == 2
index_md = sys.argv[1]
dirname = os.path.dirname(index_md)
start = False
sections = []
with open(index_md) as f:
for line in f:
line = line.rstrip().lstrip()
if ':maxdepth:' in line:
start = True
continue
elif line == '```':
break
if start and len(line) > 1:
sections.append(os.path.join(dirname, line + '.md'))
return ' '.join(sections)
def get_chapters():
assert len(sys.argv) == 2
index_md = sys.argv[1]
start = False
chapters = []
with open(index_md) as f:
for line in f:
line = line.rstrip().lstrip()
if ':maxdepth:' in line:
start = True
continue
elif line == '```':
break
if start and len(line) > 1:
chapters.append(line.split('/')[0])
return ' '.join(chapters)
#!/bin/bash
pre() {
echo "Pre-processing markdown files in source lauguage.";
for f in chapter*/*.md; do
echo $f
sed -i s/\.python\ \.input/\.python-\.input/g $f
sed -i s/\.input\ \ n=/\.input-n=/g $f
sed -i s/\</%%%less-than%%%/g $f
sed -i s/\&/%%%ampersand%%%/g $f
done
}
extract() {
echo "Convert markdown files into xliff (in source language) and skeleton files.";
BSL="bookSrcLang"
[ -e $BSL ] && rm -rf $BSL
mkdir -p $BSL
for f in chapter*/*.md; do
echo $f
xlf="${f%%.*}.xlf"
sklmd="${f%%.*}.skl.md"
./md2xliff/bin/extract $f $xlf $sklmd 'zh-CN' 'en-US'
# Generate bookSrcLang that contains only xlf files.
dir=$(dirname "$f")
mkdir -p $BSL/$dir
base=$(basename $f)
xlf_base="${base%%.*}.xlf"
cp $xlf $BSL/$dir/$xlf_base
done
}
reconstruct() {
echo "Convert xliff (in target language) and skeleton files into markdown files.";
BTL="bookTgtLang"
for f in chapter*/*.xlf; do
echo $f
# Load xlf files from translated dir.
cp $BTL/$f $f
md="${f%%.*}.md"
sklmd="${f%%.*}.skl.md"
./md2xliff/bin/xliff-reconstruct $f $sklmd $md
#rm $f
#rm $sklmd
done
}
post() {
echo "Post-processing markdown files in target language.";
for f in chapter*/*.md; do
echo $f
sed -i s/\.python-\.input/\.python\ \.input/g $f
sed -i s/\.input-n=/\.input\ \ n=/g $f
sed -i s/%%%less-than%%%/\</g $f
sed -i s/%%%ampersand%%%/\\\&/g $f
done
}
"$@"
#!/bin/bash
set -e
conda activate d2l-zh-build
# BUCKET=s3://zh.diveintodeeplearning.org
# BUCKET=s3://diveintodeeplearning-staging
DIR=build/_build/html/
aws s3 sync --delete $DIR s3://zh.d2l.ai --acl 'public-read' --quiet
aws s3 sync --delete $DIR s3://zh.diveintodeeplearning.org --acl 'public-read' --quiet
#find $DIR \( -iname '*.css' -o -iname '*.js' \) -exec gzip -9 -n {} \; -exec mv {}.gz {} \;
#aws s3 sync --exclude '*.*' --include '*.css' \
# --content-type 'text/css' \
# --content-encoding 'gzip' \
# --acl 'public-read' \
# $DIR $BUCKET
#aws s3 sync --exclude '*.*' --include '*.woff' --include '*.woff2' \
# --expires "$(date -d '+24 months' --utc +'%Y-%m-%dT%H:%M:%SZ')" \
# --acl 'public-read' --quiet \
# $DIR $BUCKET
#aws s3 sync --exclude '*.*' --include '*.js' \
# --content-type 'application/javascript' \
# --content-encoding 'gzip' \
# --acl 'public-read' \
# $DIR $BUCKET
#aws s3 sync --delete $DIR $BUCKET --acl 'public-read' --quiet
Subproject commit 6071cc8794137727e2d4a582358b59ad507560cd
from distutils.dir_util import copy_tree
import glob
import nbformat
import notedown
import os
from subprocess import check_output
import sys
import time
# To access data/imgs/gluonbook in upper level.
os.chdir('build')
def mkdir_if_not_exist(path):
if not os.path.exists(os.path.join(*path)):
os.makedirs(os.path.join(*path))
# Timeout for each notebook, in sec
timeout = 60 * 60
# The files will be ingored for execution
ignore_execution = ['chapter_computational-performance/async-computation.md']
reader = notedown.MarkdownReader(match='strict')
do_eval = int(os.environ.get('EVAL', True))
for chap in glob.glob(os.path.join('..', 'chapter_*')):
mkdir_if_not_exist(['win_ipynb', chap[3:]])
mds = filter(lambda x: x.endswith('md'), os.listdir(chap))
for md in mds:
if md != 'index.md':
in_md = os.path.join(chap, md)
out_nb = os.path.join('win_ipynb', in_md[3:-2] + 'ipynb')
if not os.path.exists(out_nb):
print('---', in_md[3:])
# read
with open(in_md, 'r', encoding="utf8") as f:
notebook = reader.read(f)
if do_eval and chap[3:] + '/' + md not in ignore_execution:
tic = time.time()
notedown.run(notebook, timeout)
print('=== Finished evaluation in %f sec'%(time.time()-tic))
# write
# need to add language info to for syntax highlight
notebook['metadata'].update({'language_info':{'name':'python'}})
with open(out_nb, 'w', encoding="utf8") as f:
f.write(nbformat.writes(notebook))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册