single_page.py 8.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
import logging
import os
import re
import shutil
import subprocess
import yaml

import bs4
import mkdocs.commands.build

import test
import util
import website


def recursive_values(item):
    if isinstance(item, dict):
A
Azat Khuzhin 已提交
18
        for _, value in list(item.items()):
19 20 21 22 23 24 25 26
            yield from recursive_values(value)
    elif isinstance(item, list):
        for value in item:
            yield from recursive_values(value)
    elif isinstance(item, str):
        yield item


A
Alexey Milovidov 已提交
27 28 29 30 31 32 33 34 35 36 37 38 39 40
anchor_not_allowed_chars = re.compile(r'[^\w\-]')
def generate_anchor_from_path(path):
    return re.sub(anchor_not_allowed_chars, '-', path)


def replace_link(match, path):
    link = match.group(1)
    if link.endswith('/'):
        link = link[0:-1] + '.md'

    return '(#{})'.format(generate_anchor_from_path(os.path.normpath(os.path.join(os.path.dirname(path), link))))


# Concatenates Markdown files to a single file.
41 42 43 44 45 46 47
def concatenate(lang, docs_path, single_page_file, nav):
    lang_path = os.path.join(docs_path, lang)

    proj_config = f'{docs_path}/toc_{lang}.yml'
    if os.path.exists(proj_config):
        with open(proj_config) as cfg_file:
            nav = yaml.full_load(cfg_file.read())['nav']
A
Alexey Milovidov 已提交
48

49 50 51 52 53 54
    files_to_concatenate = list(recursive_values(nav))
    files_count = len(files_to_concatenate)
    logging.info(f'{files_count} files will be concatenated into single md-file for {lang}.')
    logging.debug('Concatenating: ' + ', '.join(files_to_concatenate))
    assert files_count > 0, f'Empty single-page for {lang}'

A
Alexey Milovidov 已提交
55 56 57
    # (../anything) or (../anything#anchor) or (xyz-abc.md) or (xyz-abc.md#anchor)
    relative_link_regexp = re.compile(r'\((\.\./[^)#]+|[\w\-]+\.md)(?:#[^\)]*)?\)')

58 59 60
    for path in files_to_concatenate:
        try:
            with open(os.path.join(lang_path, path)) as f:
A
Alexey Milovidov 已提交
61 62
                # Insert a horizontal ruler. Then insert an anchor that we will link to. Its name will be a path to the .md file.
                single_page_file.write('\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path))
63 64

                in_metadata = False
A
Alexey Milovidov 已提交
65 66 67
                for line in f:
                    # Skip YAML metadata.
                    if line == '---\n':
68
                        in_metadata = not in_metadata
A
Alexey Milovidov 已提交
69 70
                        continue

71
                    if not in_metadata:
A
Alexey Milovidov 已提交
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
                        # Increase the level of headers.
                        if line.startswith('#'):
                            line = '#' + line

                        # Replace links within the docs.

                        if re.search(relative_link_regexp, line):
                            line = re.sub(
                                relative_link_regexp,
                                lambda match: replace_link(match, path),
                                line)

                            # If failed to replace the relative link, print to log
                            if '../' in line:
                                logging.info('Failed to resolve relative link:')
                                logging.info(path)
                                logging.info(line)

                        single_page_file.write(line)

92 93 94 95 96 97 98 99 100 101 102 103 104
        except IOError as e:
            logging.warning(str(e))

    single_page_file.flush()


def build_single_page_version(lang, args, nav, cfg):
    logging.info(f'Building single page version for {lang}')
    os.environ['SINGLE_PAGE'] = '1'
    extra = cfg.data['extra']
    extra['single_page'] = True
    extra['is_amp'] = False

A
Alexey Milovidov 已提交
105
    with open(os.path.join(args.docs_dir, lang, 'single.md'), 'w') as single_md:
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
        concatenate(lang, args.docs_dir, single_md, nav)

        with util.temp_dir() as site_temp:
            with util.temp_dir() as docs_temp:
                docs_src_lang = os.path.join(args.docs_dir, lang)
                docs_temp_lang = os.path.join(docs_temp, lang)
                shutil.copytree(docs_src_lang, docs_temp_lang)
                for root, _, filenames in os.walk(docs_temp_lang):
                    for filename in filenames:
                        if filename != 'single.md' and filename.endswith('.md'):
                            os.unlink(os.path.join(root, filename))

                cfg.load_dict({
                    'docs_dir': docs_temp_lang,
                    'site_dir': site_temp,
                    'extra': extra,
                    'nav': [
                        {cfg.data.get('site_name'): 'single.md'}
                    ]
                })

                if not args.test_only:
                    mkdocs.commands.build.build(cfg)

A
Alexey Milovidov 已提交
130
                    single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')
131 132 133 134 135 136 137 138 139 140 141

                    if os.path.exists(single_page_output_path):
                        shutil.rmtree(single_page_output_path)

                    shutil.copytree(
                        os.path.join(site_temp, 'single'),
                        single_page_output_path
                    )

                    single_page_index_html = os.path.join(single_page_output_path, 'index.html')
                    single_page_content_js = os.path.join(single_page_output_path, 'content.js')
A
Alexey Milovidov 已提交
142

143 144
                    with open(single_page_index_html, 'r') as f:
                        sp_prefix, sp_js, sp_suffix = f.read().split('<!-- BREAK -->')
A
Alexey Milovidov 已提交
145

146 147 148
                    with open(single_page_index_html, 'w') as f:
                        f.write(sp_prefix)
                        f.write(sp_suffix)
A
Alexey Milovidov 已提交
149

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
                    with open(single_page_content_js, 'w') as f:
                        if args.minify:
                            import jsmin
                            sp_js = jsmin.jsmin(sp_js)
                        f.write(sp_js)

                logging.info(f'Re-building single page for {lang} pdf/test')
                with util.temp_dir() as test_dir:
                    extra['single_page'] = False
                    cfg.load_dict({
                        'docs_dir': docs_temp_lang,
                        'site_dir': test_dir,
                        'extra': extra,
                        'nav': [
                            {cfg.data.get('site_name'): 'single.md'}
                        ]
                    })
                    mkdocs.commands.build.build(cfg)

                    css_in = ' '.join(website.get_css_in(args))
                    js_in = ' '.join(website.get_js_in(args))
                    subprocess.check_call(f'cat {css_in} > {test_dir}/css/base.css', shell=True)
                    subprocess.check_call(f'cat {js_in} > {test_dir}/js/base.js', shell=True)
A
Alexey Milovidov 已提交
173

174 175 176
                    if args.save_raw_single_page:
                        shutil.copytree(test_dir, args.save_raw_single_page)

A
Alexey Milovidov 已提交
177 178 179
                    logging.info(f'Running tests for {lang}')
                    test.test_single_page(
                        os.path.join(test_dir, 'single', 'index.html'), lang)
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216

                    if not args.skip_pdf:
                        single_page_index_html = os.path.join(test_dir, 'single', 'index.html')
                        single_page_pdf = os.path.abspath(
                            os.path.join(single_page_output_path, f'clickhouse_{lang}.pdf')
                        )

                        with open(single_page_index_html, 'r') as f:
                            soup = bs4.BeautifulSoup(
                                f.read(),
                                features='html.parser'
                            )
                        soup_prefix = f'file://{test_dir}'
                        for img in soup.findAll('img'):
                            if img['src'].startswith('/'):
                                img['src'] = soup_prefix + img['src']
                        for script in soup.findAll('script'):
                            script_src = script.get('src')
                            if script_src:
                                script['src'] = soup_prefix + script_src.split('?', 1)[0]
                        for link in soup.findAll('link'):
                            link['href'] = soup_prefix + link['href'].split('?', 1)[0]

                        with open(single_page_index_html, 'w') as f:
                            f.write(str(soup))

                        create_pdf_command = [
                            'wkhtmltopdf',
                            '--print-media-type',
                            '--log-level', 'warn',
                            single_page_index_html, single_page_pdf
                        ]

                        logging.info(' '.join(create_pdf_command))
                        subprocess.check_call(' '.join(create_pdf_command), shell=True)

        logging.info(f'Finished building single page version for {lang}')