diff --git a/pdf_tools.exe b/pdf_tools.exe new file mode 100644 index 0000000000000000000000000000000000000000..7693a3f4dd116b1c31eb3e0ce20866bddb4a2dd7 Binary files /dev/null and b/pdf_tools.exe differ diff --git a/pdf_tools.py b/pdf_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..4f4fbe4f86bf58260e941b0d271e7a03ef4608b3 --- /dev/null +++ b/pdf_tools.py @@ -0,0 +1,330 @@ +import math +import os +import re +import subprocess as sub +import sys +import warnings +from glob import glob + +import PySimpleGUI as sg +from PyPDF2 import PdfFileReader, PdfFileWriter + +from pikepdf import Pdf, Rectangle +from reportlab.lib import units +from reportlab.pdfbase import pdfmetrics +from reportlab.pdfbase.ttfonts import TTFont +from reportlab.pdfgen import canvas + +warnings.filterwarnings("ignore") + +def pdf_add_water_mark(pdf_file, text, overwrite=False, col=2, row=3, font_file="C:\Windows\Fonts\msyh.ttc", + font_szie=35, rotate=45, alpha=0.3): + pdfmetrics.registerFont(TTFont('msyh', font_file)) # 加载中文字体 + c = canvas.Canvas("watermark.pdf", pagesize=( + 200 * units.mm, 200 * units.mm)) + c.translate(20 * units.mm, 20 * units.mm) + c.rotate(rotate) # 把水印文字旋转45° + c.setFont('msyh', font_szie) # 字体大小 + c.setStrokeColorRGB(0, 0, 0) # 设置字体颜色 + c.setFillColorRGB(0, 0, 0) # 设置填充颜色 + c.setFillAlpha(alpha) # 设置透明度,越小越透明 + c.drawString(0, 0, text) + c.save() + water_mark_pdf = Pdf.open("watermark.pdf") + water_mark = water_mark_pdf.pages[0] + + target = Pdf.open(pdf_file, allow_overwriting_input=True) + for page in target.pages: + _, _, w, h = page.trimbox + for x in range(col): # 每一行显示多少列水印 + for y in range(row): # 每一页显示多少行PDF + page.add_overlay(water_mark, + Rectangle(w * x / col, + h * y / row, + w * (x + 1) / col, + h * (y + 1) / row)) + if overwrite: + file = None + else: + file = pdf_file.replace(".pdf", "_带水印.pdf") + target.save(file) + return file or pdf_file + + +sg.change_look_and_feel("Python") + + +def write_pdf(save_filepath, pdf_writer): + with open(save_filepath, "wb") as out: + pdf_writer.write(out) + + +def split_pdf(filename, output, n=3): + basename = os.path.basename(filename) + name, ext = os.path.splitext(basename) + pdf_reader = PdfFileReader(filename) + pdf_writer = PdfFileWriter() + os.makedirs(output, exist_ok=True) + page_num = len(pdf_reader.pages) + print(f"pdf共{page_num}页,要写出{math.ceil(page_num / n)}页") + for i, page in enumerate(pdf_reader.pages, 1): + page.compressContentStreams() + pdf_writer.addPage(page) + if i % n == 0: + output_name = f"{output}/{name}_{i // n}.pdf" + write_pdf(output_name, pdf_writer) + print(output_name, "写出完成") + pdf_writer = PdfFileWriter() + if i % n != 0: + output_name = f"{output}/{name}_{i // n}.pdf" + write_pdf(output_name, pdf_writer) + print(output_name, "写出完成") + print("拆分完成!") + + +def get_files(dir_path): + files = glob(f"{dir_path}/*.pdf") + files.sort(key=lambda s: [(s, int(n)) for s, n in re.findall('(\D+)(\d+)', f'a{s}0')]) + return files + + +def merge_pdf(dir_path, out_name, add_bookmark=True): + dir_path = dir_path.strip("\\/") + files = get_files(dir_path) + pdf_writer = PdfFileWriter() + output_pages = 0 + for pdf_file in files: + pdf_reader = PdfFileReader(pdf_file) + pdf_file = os.path.basename(pdf_file) + pageCount = pdf_reader.getNumPages() + output_pages += pageCount + print(pdf_file, "页数:", pageCount) + for page in pdf_reader.pages: + page.compressContentStreams() + pdf_writer.addPage(page) + if add_bookmark: + pdf_writer.addBookmark(pdf_file[:pdf_file.rfind(".")], output_pages - pageCount, parent=None) + print("总页数:", output_pages) + print("开始写出到文件") + with open(out_name, "wb") as outputfile: + pdf_writer.write(outputfile) + print("PDF文件合并完成") + + +def get_pdf_Bookmark(filename): + "作者CSDN:https://blog.csdn.net/as604049322" + if isinstance(filename, str): + pdf_reader = PdfFileReader(filename) + else: + pdf_reader = filename + pagecount = pdf_reader.getNumPages() + # 用保存每个标题id所对应的页码 + idnum2pagenum = {} + for i in range(pagecount): + page = pdf_reader.getPage(i) + idnum2pagenum[page.indirectRef.idnum] = i + # 保存每个标题对应的标签数据,包括层级,标题和页码索引(页码-1) + bookmark = [] + + def get_pdf_Bookmark_inter(outlines, tab=0): + for outline in outlines: + if isinstance(outline, list): + get_pdf_Bookmark_inter(outline, tab + 1) + else: + bookmark.append( + (tab, outline['/Title'], idnum2pagenum[outline.page.idnum])) + + outlines = pdf_reader.getOutlines() + get_pdf_Bookmark_inter(outlines) + return bookmark + + +def read_bookmark_from_file(filename="bookmark.txt", diff=0): + bookmark = [] + with open(filename, "rb") as f: + import cchardet + bytes = f.read() + content = bytes.decode(cchardet.detect(bytes)['encoding']) + for line in content.splitlines(): + l2 = line.rfind("\t") + l1 = line.rfind("\t", 0, l2) + bookmark.append((l1 + 1, line[l1 + 1:l2], int(line[l2 + 1:]) - 1 + diff)) + return bookmark + + +def write_bookmark2file(bookmark, filename="bookmark.txt"): + with open(filename, "w", encoding="gbk") as f: + for tab, title, pagenum in bookmark: + prefix = "\t" * tab + f.write(f"{prefix}{title}\t{pagenum + 1}\n") + print("书签已经写出到文件", filename) + + +def pdf_write_bookmark(bookmark, pdf_file, compress): + pdf_reader = PdfFileReader(pdf_file) + num_pages = pdf_reader.getNumPages() + pdf_writer = PdfFileWriter() + for page in pdf_reader.pages: + if compress: + page.compressContentStreams() + pdf_writer.addPage(page) + # pdf_reader. + last_cache = [None] * (max(bookmark, key=lambda x: x[0])[0] + 1) + for tab, title, pagenum in bookmark: + if pagenum >= num_pages: + continue + parent = last_cache[tab - 1] if tab > 0 else None + indirect_id = pdf_writer.addBookmark(title, pagenum, parent=parent) + last_cache[tab] = indirect_id + pdf_writer.setPageMode("/UseOutlines") + with open(pdf_file, "wb") as out: + pdf_writer.write(out) + print("已成功将书签写入到", pdf_file) + + +sg.change_look_and_feel("Python") +# 布局设置 +layout = [ + [sg.TabGroup([[ + sg.Tab('拆分', [ + [sg.Text('pdf文件地址:', font=("楷体", 12)), + sg.In(size=(32, 1), key="filename"), + sg.FileBrowse('...', target='filename', file_types=(("PDF Files", "*.pdf"),), initial_folder=".")], + [sg.Text('拆分页数:', font=("楷体", 12)), + sg.In(size=(3, 1), key="n", default_text="5"), + sg.Button('开始拆分'), + sg.Button('打开输出目录'), + ], + [sg.Text('输出目录:', font=("楷体", 12)), + sg.In(size=(35, 1), default_text="./output", key="save_dir"), + sg.FolderBrowse('...', target='save_dir', initial_folder="."), + ], + ]), + sg.Tab('合并', [ + [sg.Text('pdf文件目录:', font=("楷体", 12)), + sg.In(size=(32, 1), key="pdf_dir"), + sg.FolderBrowse('...', target='pdf_dir', initial_folder=".")], + [sg.Checkbox('添加文件名作为书签', key="add_bookmark", default=True), + sg.Button('开始合并'), + sg.Button('定位输出位置'), + ], + [sg.Text('输出位置:', font=("楷体", 12)), + sg.In(size=(35, 1), default_text="./合并.pdf", key="out_name"), + sg.FileSaveAs('...', target='out_name', file_types=(("PDF Files", "*.pdf"),), initial_folder="."), + ], + ]), + sg.Tab('书签', [ + [sg.Text('pdf文件地址:', font=("楷体", 12)), sg.In(size=(32, 1), key="pdf_file"), + sg.FileBrowse('...', target='pdf_file', file_types=(("PDF Files", "*.pdf"),), initial_folder=".")], + [sg.Button('提取书签'), + sg.Button('定位书签位置'), + ], + [sg.Text('书签位置:', font=("楷体", 12)), + sg.In(size=(35, 1), default_text="./bookmark.txt", key="bookmark_name"), + sg.FileSaveAs('...', target='bookmark_name', file_types=(("书签文件", "*.txt"),), initial_folder="."), + ], + [sg.Checkbox('压缩', key="compress", default=False), + sg.Text('偏移:'), sg.In(size=(3, 1), default_text="0", key="diff"), + sg.Button('写入书签'), ] + ]), + sg.Tab('水印', [ + [sg.Text('pdf文件地址:', font=("楷体", 12)), + sg.In(size=(32, 1), key="pdf_file2"), + sg.FileBrowse('...', target='pdf_file2', file_types=(("PDF Files", "*.pdf"),), initial_folder=".")], + [ + sg.Text('行:'), sg.In(size=(2, 1), default_text="3", key="row"), + sg.Text('列:'), sg.In(size=(2, 1), default_text="2", key="col"), + sg.Text('旋转:'), sg.In(size=(2, 1), default_text="45", key="rotate"), + sg.Text('透明度:'), sg.In(size=(3, 1), default_text="0.3", key="alpha"), + sg.Checkbox('覆盖', key="overwrite", default=False), + ], + [ + sg.Text('字体:', font=("楷体", 12)), + sg.In(size=(26, 1), key="font_file", default_text="C:\Windows\Fonts\msyh.ttc"), + sg.FileBrowse('...', target='font_file', initial_folder="."), + sg.Text('大小:'), sg.In(size=(3, 1), default_text="35", key="font_szie"), + ], + [sg.Text('水印内容:', font=("楷体", 12)), sg.In(size=(24, 1), key="text"), + sg.Button('添加水印'), + ], + ]), + ]])], + + [sg.Output(size=(53, 7), key="out", text_color="#15d36a")], + [ + sg.Checkbox('处理完毕后定位输出目录', key="open_folder", default=True), + sg.Button('清空输出'), + ], + [sg.Text("@小小明:https://blog.csdn.net/as604049322"), ], +] + + +def resource_path(relative_path): + base_path = getattr(sys, '_MEIPASS', os.path.dirname(os.path.abspath(__file__))) + return os.path.join(base_path, relative_path) + + +title = 'PDF常用工具箱 v0.2' +window = sg.Window(title, layout, icon=resource_path("./pdf.ico")) +window.finalize() + +if __name__ == '__main__': + while True: + event, values = window.read() + if event in (None,): + break # 相当于关闭界面 + elif event == "开始拆分": + save_dir = os.path.abspath(values['save_dir']) + os.makedirs(values['save_dir'], exist_ok=True) + split_pdf(values["filename"], save_dir, int(values["n"])) + window["save_dir"].Update(save_dir) + if values["open_folder"]: + sub.Popen(f"explorer {save_dir}", shell=False) + elif event == "打开输出目录": + save_dir = os.path.abspath(values['save_dir']) + if os.path.exists(save_dir): + sub.Popen(f"explorer {save_dir}", shell=False) + else: + sg.popup("输出目录不存在!") + window["save_dir"].Update(os.path.abspath("./output")) + elif event == "开始合并": + pdf_dir = os.path.abspath(values['pdf_dir']) + out_name = os.path.abspath(values['out_name']) + merge_pdf(pdf_dir, out_name, values["add_bookmark"]) + window["out_name"].Update(out_name) + if values["open_folder"]: + sub.Popen(f"explorer {os.path.dirname(out_name)}", shell=False) + elif event == "定位输出位置": + out_name = os.path.abspath(values['out_name']) + sub.Popen(f"explorer {os.path.dirname(out_name)}", shell=False) + elif event == "提取书签": + pdf_file = os.path.abspath(values['pdf_file']) + bookmark_name = os.path.abspath(values['bookmark_name']) + bookmark = get_pdf_Bookmark(pdf_file) + window["bookmark_name"].Update(bookmark_name) + write_bookmark2file(bookmark, filename=bookmark_name) + if values["open_folder"]: + sub.Popen(f"explorer {os.path.dirname(bookmark_name)}", shell=False) + elif event == "写入书签": + pdf_file = os.path.abspath(values['pdf_file']) + bookmark_name = os.path.abspath(values['bookmark_name']) + bookmark = read_bookmark_from_file(bookmark_name, int(values["diff"])) + window["bookmark_name"].Update(bookmark_name) + pdf_write_bookmark(bookmark, pdf_file, values["compress"]) + if values["open_folder"]: + sub.Popen(f"explorer {os.path.dirname(pdf_file)}", shell=False) + elif event == "定位书签位置": + bookmark_name = os.path.abspath(values['bookmark_name']) + sub.Popen(f"explorer {os.path.dirname(bookmark_name)}", shell=False) + # elif event == "添加水印": + # pdf_file = os.path.abspath(values['pdf_file2']) + # result_file = pdf_add_water_mark(pdf_file, values["text"], values["overwrite"], + # int(values["col"]), int(values["row"]), + # values["font_file"], int(values["font_szie"]), + # int(values["rotate"]), float(values["alpha"])) + # print("水印保存到", result_file) + # if values["open_folder"]: + # sub.Popen(f"explorer {os.path.dirname(result_file)}", shell=False) + elif event == "清空输出": + window["out"].Update("") + window.close()