community.py 4.2 KB
Newer Older
ToTensor's avatar
ToTensor 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
import os
import json
import html
import requests
import logging

logger = logging.getLogger(__name__)


def get_files_path(file_dir, filetype='.txt'):
    """得到文件夹下的所有.txt文件的路径
    Args:
        file_dir: 文件夹路径
        filetype: 文件后缀
    Returns:
        所有filetype类型文件的绝对路径
    """
    files_path = []
    for root, dirs, files in os.walk(file_dir):
        for file in files:
            if filetype is None or (os.path.splitext(file)[1] == filetype):
                files_path.append(os.path.join(root, file))
    return files_path


ToTensor's avatar
ToTensor 已提交
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
def get_all_files(current_address):
    files = []
    for parent, dirnames, filenames in os.walk(current_address):
        # Case1: traversal the directories
        # for dirname in dirnames:
        #     print("Parent folder:", parent)
        #     print("Dirname:", dirname)
        # # Case2: traversal the files
        for filename in filenames:
            # print("Parent folder:", parent)
            file_path = os.path.join(parent, filename)
            files.append(file_path)
    return files


ToTensor's avatar
ToTensor 已提交
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
def post(url, params, retry=3, headers=None):
    if headers is None:
        hdrs = {"Content-Type": "application/json"}
    else:
        hdrs = headers
    fails = 0
    while fails < retry:
        try:
            if headers is None:
                data = json.dumps(params)
            else:
                data = params
            logger.debug(f"will post {data} to {url}")
            resp = requests.post(url, data, headers=hdrs, timeout=10)
            if resp:
                logger.info(f"resp {resp.content}")
                return resp.json()
            else:
                logger.error(f"resp: [{resp}]")
                fails += 1
        except Exception as error:
            logger.error(f"post {params} to {url} failed {error}")
            fails += 1
            if fails > retry:
                raise error


def send_topic(web_url):
    data_dir = 'data'
ToTensor's avatar
ToTensor 已提交
70
    book_dir = 'data/深入剖析Nginx/'
ToTensor's avatar
ToTensor 已提交
71 72 73
    # web_url = "https://codechina.csdn.net/csdn/book_code_c798a5992a654857867ec15660e1c32a/-/blob/master/"
    request_url = 'http://ccloud.internal.csdn.net/v1/internal/community/content/sendTopic'

ToTensor's avatar
ToTensor 已提交
74 75 76 77 78 79
    # files = get_files_path('data/全程软件测试(第3版)', '.java')

    files = get_all_files(book_dir)
    print(files)

    mapping_path = 'data/深入剖析Nginx.json'
ToTensor's avatar
ToTensor 已提交
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104

    if not os.path.exists(mapping_path):
        chapter_code_mapping = {}
        save_mapping = json.dumps(chapter_code_mapping,
                                  ensure_ascii=False,
                                  indent=2)
        with open(mapping_path, 'w') as f:
            f.write(save_mapping)

    with open(mapping_path, 'r') as f:
        chapter_code_mapping = json.load(f)

    for file in files:
        topic_title = file.replace(book_dir, '')
        topic_title = topic_title.replace('/', '|')
        topic_title = topic_title.replace(' ', '.')
        # topic_title = html.escape(topic_title)
        topic_content = web_url + file
        topic_content = "代码:<a href=\"{}\">{}</a>".format(
            topic_content, topic_title)

        print(topic_title)

        send_topic_request_param = {
            "type": "long_text",
ToTensor's avatar
ToTensor 已提交
105
            "cateId": 20966,
ToTensor's avatar
ToTensor 已提交
106 107 108
            "content": topic_content,
            "topicTitle": topic_title,
            "mdContent": topic_content,
ToTensor's avatar
ToTensor 已提交
109
            "communityId": 3821,
ToTensor's avatar
ToTensor 已提交
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
            "loginUserName": "BBS_Assistant",
            "bizNo": "ebook"
        }

        if chapter_code_mapping.get(file) is None:
            resp = post(request_url, send_topic_request_param)
            topic_link = resp['data']['content']['url']
            chapter_code_mapping[file] = topic_link
            print('{}:{}'.format(file, topic_link))
            save_mapping = json.dumps(chapter_code_mapping,
                                      ensure_ascii=False,
                                      indent=2)
            with open(mapping_path, 'w') as f:
                f.write(save_mapping)
        else:
            send_topic_request_param['id'] = int(
                chapter_code_mapping[file].split('/')[-1])
            resp = post(request_url, send_topic_request_param)
            print('{}:{}'.format(file, chapter_code_mapping.get(file)))