replace_url.py 1.0 KB
Newer Older
L
LaraStuStu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
import re


def replace_url(slug, content, branch='master'):

    def repl(match):
        if not match:
            return

        url = match.group(1)
        if url.startswith('http'):
            return match.group(0)

        url_new = (
            'https://github.com/{slug}/blob/{branch}/{url}'
            .format(slug=slug, branch=branch, url=url)
        )
        if re.match(r'.*[\.jpg|\.png]$', url_new):
            url_new += '?raw=true'

        start0, end0 = match.regs[0]
        start, end = match.regs[1]
        start -= start0
        end -= start0

        res = match.group(0)
        res = res[:start] + url_new + res[end:]
        return res

    lines = []
    for line in content.splitlines():
        patterns = [
            r'!\[.*?\]\((.*?)\)',
            r'<img.*?src="(.*?)".*?>',
            r'\[.*?\]\((.*?)\)',
            r'<a.*?href="(.*?)".*?>',
        ]
        for pattern in patterns:
            line = re.sub(pattern, repl, line)
        lines.append(line)
    return '\n'.join(lines)