single_page.py 8.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
import logging
import os
import re
import shutil
import subprocess
import yaml

import bs4
import mkdocs.commands.build

import test
import util
import website


def recursive_values(item):
    if isinstance(item, dict):
A
Azat Khuzhin 已提交
18
        for _, value in list(item.items()):
19 20 21 22 23 24 25 26
            yield from recursive_values(value)
    elif isinstance(item, list):
        for value in item:
            yield from recursive_values(value)
    elif isinstance(item, str):
        yield item


A
Alexey Milovidov 已提交
27 28 29 30 31 32
anchor_not_allowed_chars = re.compile(r'[^\w\-]')
def generate_anchor_from_path(path):
    return re.sub(anchor_not_allowed_chars, '-', path)


def replace_link(match, path):
A
Alexey Milovidov 已提交
33 34 35 36 37 38 39
    title = match.group(1)
    link = match.group(2)

    # Not a relative link
    if link.startswith('http'):
        return match.group(0)

A
Alexey Milovidov 已提交
40 41 42
    if link.endswith('/'):
        link = link[0:-1] + '.md'

A
Alexey Milovidov 已提交
43
    return '[{}](#{})'.format(title, generate_anchor_from_path(os.path.normpath(os.path.join(os.path.dirname(path), link))))
A
Alexey Milovidov 已提交
44 45 46


# Concatenates Markdown files to a single file.
47 48 49 50 51 52 53
def concatenate(lang, docs_path, single_page_file, nav):
    lang_path = os.path.join(docs_path, lang)

    proj_config = f'{docs_path}/toc_{lang}.yml'
    if os.path.exists(proj_config):
        with open(proj_config) as cfg_file:
            nav = yaml.full_load(cfg_file.read())['nav']
A
Alexey Milovidov 已提交
54

55 56 57 58 59 60
    files_to_concatenate = list(recursive_values(nav))
    files_count = len(files_to_concatenate)
    logging.info(f'{files_count} files will be concatenated into single md-file for {lang}.')
    logging.debug('Concatenating: ' + ', '.join(files_to_concatenate))
    assert files_count > 0, f'Empty single-page for {lang}'

A
Alexey Milovidov 已提交
61
    link_regexp = re.compile(r'(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)')
A
Alexey Milovidov 已提交
62

63 64 65
    for path in files_to_concatenate:
        try:
            with open(os.path.join(lang_path, path)) as f:
A
Alexey Milovidov 已提交
66 67
                # Insert a horizontal ruler. Then insert an anchor that we will link to. Its name will be a path to the .md file.
                single_page_file.write('\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path))
68 69

                in_metadata = False
A
Alexey Milovidov 已提交
70 71 72
                for line in f:
                    # Skip YAML metadata.
                    if line == '---\n':
73
                        in_metadata = not in_metadata
A
Alexey Milovidov 已提交
74 75
                        continue

76
                    if not in_metadata:
A
Alexey Milovidov 已提交
77 78 79 80 81 82
                        # Increase the level of headers.
                        if line.startswith('#'):
                            line = '#' + line

                        # Replace links within the docs.

A
Alexey Milovidov 已提交
83
                        if re.search(link_regexp, line):
A
Alexey Milovidov 已提交
84
                            line = re.sub(
A
Alexey Milovidov 已提交
85
                                link_regexp,
A
Alexey Milovidov 已提交
86 87 88 89 90 91 92 93 94 95 96
                                lambda match: replace_link(match, path),
                                line)

                            # If failed to replace the relative link, print to log
                            if '../' in line:
                                logging.info('Failed to resolve relative link:')
                                logging.info(path)
                                logging.info(line)

                        single_page_file.write(line)

97 98 99 100 101 102 103 104 105 106 107 108 109
        except IOError as e:
            logging.warning(str(e))

    single_page_file.flush()


def build_single_page_version(lang, args, nav, cfg):
    logging.info(f'Building single page version for {lang}')
    os.environ['SINGLE_PAGE'] = '1'
    extra = cfg.data['extra']
    extra['single_page'] = True
    extra['is_amp'] = False

A
Alexey Milovidov 已提交
110
    with open(os.path.join(args.docs_dir, lang, 'single.md'), 'w') as single_md:
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
        concatenate(lang, args.docs_dir, single_md, nav)

        with util.temp_dir() as site_temp:
            with util.temp_dir() as docs_temp:
                docs_src_lang = os.path.join(args.docs_dir, lang)
                docs_temp_lang = os.path.join(docs_temp, lang)
                shutil.copytree(docs_src_lang, docs_temp_lang)
                for root, _, filenames in os.walk(docs_temp_lang):
                    for filename in filenames:
                        if filename != 'single.md' and filename.endswith('.md'):
                            os.unlink(os.path.join(root, filename))

                cfg.load_dict({
                    'docs_dir': docs_temp_lang,
                    'site_dir': site_temp,
                    'extra': extra,
                    'nav': [
                        {cfg.data.get('site_name'): 'single.md'}
                    ]
                })

                if not args.test_only:
                    mkdocs.commands.build.build(cfg)

A
Alexey Milovidov 已提交
135
                    single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')
136 137 138 139 140 141 142 143 144 145 146

                    if os.path.exists(single_page_output_path):
                        shutil.rmtree(single_page_output_path)

                    shutil.copytree(
                        os.path.join(site_temp, 'single'),
                        single_page_output_path
                    )

                    single_page_index_html = os.path.join(single_page_output_path, 'index.html')
                    single_page_content_js = os.path.join(single_page_output_path, 'content.js')
A
Alexey Milovidov 已提交
147

148 149
                    with open(single_page_index_html, 'r') as f:
                        sp_prefix, sp_js, sp_suffix = f.read().split('<!-- BREAK -->')
A
Alexey Milovidov 已提交
150

151 152 153
                    with open(single_page_index_html, 'w') as f:
                        f.write(sp_prefix)
                        f.write(sp_suffix)
A
Alexey Milovidov 已提交
154

155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
                    with open(single_page_content_js, 'w') as f:
                        if args.minify:
                            import jsmin
                            sp_js = jsmin.jsmin(sp_js)
                        f.write(sp_js)

                logging.info(f'Re-building single page for {lang} pdf/test')
                with util.temp_dir() as test_dir:
                    extra['single_page'] = False
                    cfg.load_dict({
                        'docs_dir': docs_temp_lang,
                        'site_dir': test_dir,
                        'extra': extra,
                        'nav': [
                            {cfg.data.get('site_name'): 'single.md'}
                        ]
                    })
                    mkdocs.commands.build.build(cfg)

                    css_in = ' '.join(website.get_css_in(args))
                    js_in = ' '.join(website.get_js_in(args))
                    subprocess.check_call(f'cat {css_in} > {test_dir}/css/base.css', shell=True)
                    subprocess.check_call(f'cat {js_in} > {test_dir}/js/base.js', shell=True)
A
Alexey Milovidov 已提交
178

179 180 181
                    if args.save_raw_single_page:
                        shutil.copytree(test_dir, args.save_raw_single_page)

A
Alexey Milovidov 已提交
182 183 184
                    logging.info(f'Running tests for {lang}')
                    test.test_single_page(
                        os.path.join(test_dir, 'single', 'index.html'), lang)
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221

                    if not args.skip_pdf:
                        single_page_index_html = os.path.join(test_dir, 'single', 'index.html')
                        single_page_pdf = os.path.abspath(
                            os.path.join(single_page_output_path, f'clickhouse_{lang}.pdf')
                        )

                        with open(single_page_index_html, 'r') as f:
                            soup = bs4.BeautifulSoup(
                                f.read(),
                                features='html.parser'
                            )
                        soup_prefix = f'file://{test_dir}'
                        for img in soup.findAll('img'):
                            if img['src'].startswith('/'):
                                img['src'] = soup_prefix + img['src']
                        for script in soup.findAll('script'):
                            script_src = script.get('src')
                            if script_src:
                                script['src'] = soup_prefix + script_src.split('?', 1)[0]
                        for link in soup.findAll('link'):
                            link['href'] = soup_prefix + link['href'].split('?', 1)[0]

                        with open(single_page_index_html, 'w') as f:
                            f.write(str(soup))

                        create_pdf_command = [
                            'wkhtmltopdf',
                            '--print-media-type',
                            '--log-level', 'warn',
                            single_page_index_html, single_page_pdf
                        ]

                        logging.info(' '.join(create_pdf_command))
                        subprocess.check_call(' '.join(create_pdf_command), shell=True)

        logging.info(f'Finished building single page version for {lang}')