import math import os import re import subprocess as sub import sys import warnings from glob import glob import PySimpleGUI as sg from PyPDF2 import PdfFileReader, PdfFileWriter from pikepdf import Pdf, Rectangle from reportlab.lib import units from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from reportlab.pdfgen import canvas warnings.filterwarnings("ignore") def pdf_add_water_mark(pdf_file, text, overwrite=False, col=2, row=3, font_file="C:\Windows\Fonts\msyh.ttc", font_szie=35, rotate=45, alpha=0.3): pdfmetrics.registerFont(TTFont('msyh', font_file)) # 加载中文字体 c = canvas.Canvas("watermark.pdf", pagesize=( 200 * units.mm, 200 * units.mm)) c.translate(20 * units.mm, 20 * units.mm) c.rotate(rotate) # 把水印文字旋转45° c.setFont('msyh', font_szie) # 字体大小 c.setStrokeColorRGB(0, 0, 0) # 设置字体颜色 c.setFillColorRGB(0, 0, 0) # 设置填充颜色 c.setFillAlpha(alpha) # 设置透明度,越小越透明 c.drawString(0, 0, text) c.save() water_mark_pdf = Pdf.open("watermark.pdf") water_mark = water_mark_pdf.pages[0] target = Pdf.open(pdf_file, allow_overwriting_input=True) for page in target.pages: _, _, w, h = page.trimbox for x in range(col): # 每一行显示多少列水印 for y in range(row): # 每一页显示多少行PDF page.add_overlay(water_mark, Rectangle(w * x / col, h * y / row, w * (x + 1) / col, h * (y + 1) / row)) if overwrite: file = None else: file = pdf_file.replace(".pdf", "_带水印.pdf") target.save(file) return file or pdf_file sg.change_look_and_feel("Python") def write_pdf(save_filepath, pdf_writer): with open(save_filepath, "wb") as out: pdf_writer.write(out) def split_pdf(filename, output, n=3): basename = os.path.basename(filename) name, ext = os.path.splitext(basename) pdf_reader = PdfFileReader(filename) pdf_writer = PdfFileWriter() os.makedirs(output, exist_ok=True) page_num = len(pdf_reader.pages) print(f"pdf共{page_num}页,要写出{math.ceil(page_num / n)}页") for i, page in enumerate(pdf_reader.pages, 1): page.compressContentStreams() pdf_writer.addPage(page) if i % n == 0: output_name = f"{output}/{name}_{i // n}.pdf" write_pdf(output_name, pdf_writer) print(output_name, "写出完成") pdf_writer = PdfFileWriter() if i % n != 0: output_name = f"{output}/{name}_{i // n}.pdf" write_pdf(output_name, pdf_writer) print(output_name, "写出完成") print("拆分完成!") def get_files(dir_path): files = glob(f"{dir_path}/*.pdf") files.sort(key=lambda s: [(s, int(n)) for s, n in re.findall('(\D+)(\d+)', f'a{s}0')]) return files def merge_pdf(dir_path, out_name, add_bookmark=True): dir_path = dir_path.strip("\\/") files = get_files(dir_path) pdf_writer = PdfFileWriter() output_pages = 0 for pdf_file in files: pdf_reader = PdfFileReader(pdf_file) pdf_file = os.path.basename(pdf_file) pageCount = pdf_reader.getNumPages() output_pages += pageCount print(pdf_file, "页数:", pageCount) for page in pdf_reader.pages: page.compressContentStreams() pdf_writer.addPage(page) if add_bookmark: pdf_writer.addBookmark(pdf_file[:pdf_file.rfind(".")], output_pages - pageCount, parent=None) print("总页数:", output_pages) print("开始写出到文件") with open(out_name, "wb") as outputfile: pdf_writer.write(outputfile) print("PDF文件合并完成") def get_pdf_Bookmark(filename): "作者CSDN:https://blog.csdn.net/as604049322" if isinstance(filename, str): pdf_reader = PdfFileReader(filename) else: pdf_reader = filename pagecount = pdf_reader.getNumPages() # 用保存每个标题id所对应的页码 idnum2pagenum = {} for i in range(pagecount): page = pdf_reader.getPage(i) idnum2pagenum[page.indirectRef.idnum] = i # 保存每个标题对应的标签数据,包括层级,标题和页码索引(页码-1) bookmark = [] def get_pdf_Bookmark_inter(outlines, tab=0): for outline in outlines: if isinstance(outline, list): get_pdf_Bookmark_inter(outline, tab + 1) else: bookmark.append( (tab, outline['/Title'], idnum2pagenum[outline.page.idnum])) outlines = pdf_reader.getOutlines() get_pdf_Bookmark_inter(outlines) return bookmark def read_bookmark_from_file(filename="bookmark.txt", diff=0): bookmark = [] with open(filename, "rb") as f: import cchardet bytes = f.read() content = bytes.decode(cchardet.detect(bytes)['encoding']) for line in content.splitlines(): l2 = line.rfind("\t") l1 = line.rfind("\t", 0, l2) bookmark.append((l1 + 1, line[l1 + 1:l2], int(line[l2 + 1:]) - 1 + diff)) return bookmark def write_bookmark2file(bookmark, filename="bookmark.txt"): with open(filename, "w", encoding="gbk") as f: for tab, title, pagenum in bookmark: prefix = "\t" * tab f.write(f"{prefix}{title}\t{pagenum + 1}\n") print("书签已经写出到文件", filename) def pdf_write_bookmark(bookmark, pdf_file, compress): pdf_reader = PdfFileReader(pdf_file) num_pages = pdf_reader.getNumPages() pdf_writer = PdfFileWriter() for page in pdf_reader.pages: if compress: page.compressContentStreams() pdf_writer.addPage(page) # pdf_reader. last_cache = [None] * (max(bookmark, key=lambda x: x[0])[0] + 1) for tab, title, pagenum in bookmark: if pagenum >= num_pages: continue parent = last_cache[tab - 1] if tab > 0 else None indirect_id = pdf_writer.addBookmark(title, pagenum, parent=parent) last_cache[tab] = indirect_id pdf_writer.setPageMode("/UseOutlines") with open(pdf_file, "wb") as out: pdf_writer.write(out) print("已成功将书签写入到", pdf_file) sg.change_look_and_feel("Python") # 布局设置 layout = [ [sg.TabGroup([[ sg.Tab('拆分', [ [sg.Text('pdf文件地址:', font=("楷体", 12)), sg.In(size=(32, 1), key="filename"), sg.FileBrowse('...', target='filename', file_types=(("PDF Files", "*.pdf"),), initial_folder=".")], [sg.Text('拆分页数:', font=("楷体", 12)), sg.In(size=(3, 1), key="n", default_text="5"), sg.Button('开始拆分'), sg.Button('打开输出目录'), ], [sg.Text('输出目录:', font=("楷体", 12)), sg.In(size=(35, 1), default_text="./output", key="save_dir"), sg.FolderBrowse('...', target='save_dir', initial_folder="."), ], ]), sg.Tab('合并', [ [sg.Text('pdf文件目录:', font=("楷体", 12)), sg.In(size=(32, 1), key="pdf_dir"), sg.FolderBrowse('...', target='pdf_dir', initial_folder=".")], [sg.Checkbox('添加文件名作为书签', key="add_bookmark", default=True), sg.Button('开始合并'), sg.Button('定位输出位置'), ], [sg.Text('输出位置:', font=("楷体", 12)), sg.In(size=(35, 1), default_text="./合并.pdf", key="out_name"), sg.FileSaveAs('...', target='out_name', file_types=(("PDF Files", "*.pdf"),), initial_folder="."), ], ]), sg.Tab('书签', [ [sg.Text('pdf文件地址:', font=("楷体", 12)), sg.In(size=(32, 1), key="pdf_file"), sg.FileBrowse('...', target='pdf_file', file_types=(("PDF Files", "*.pdf"),), initial_folder=".")], [sg.Button('提取书签'), sg.Button('定位书签位置'), ], [sg.Text('书签位置:', font=("楷体", 12)), sg.In(size=(35, 1), default_text="./bookmark.txt", key="bookmark_name"), sg.FileSaveAs('...', target='bookmark_name', file_types=(("书签文件", "*.txt"),), initial_folder="."), ], [sg.Checkbox('压缩', key="compress", default=False), sg.Text('偏移:'), sg.In(size=(3, 1), default_text="0", key="diff"), sg.Button('写入书签'), ] ]), sg.Tab('水印', [ [sg.Text('pdf文件地址:', font=("楷体", 12)), sg.In(size=(32, 1), key="pdf_file2"), sg.FileBrowse('...', target='pdf_file2', file_types=(("PDF Files", "*.pdf"),), initial_folder=".")], [ sg.Text('行:'), sg.In(size=(2, 1), default_text="3", key="row"), sg.Text('列:'), sg.In(size=(2, 1), default_text="2", key="col"), sg.Text('旋转:'), sg.In(size=(2, 1), default_text="45", key="rotate"), sg.Text('透明度:'), sg.In(size=(3, 1), default_text="0.3", key="alpha"), sg.Checkbox('覆盖', key="overwrite", default=False), ], [ sg.Text('字体:', font=("楷体", 12)), sg.In(size=(26, 1), key="font_file", default_text="C:\Windows\Fonts\msyh.ttc"), sg.FileBrowse('...', target='font_file', initial_folder="."), sg.Text('大小:'), sg.In(size=(3, 1), default_text="35", key="font_szie"), ], [sg.Text('水印内容:', font=("楷体", 12)), sg.In(size=(24, 1), key="text"), sg.Button('添加水印'), ], ]), ]])], [sg.Output(size=(53, 7), key="out", text_color="#15d36a")], [ sg.Checkbox('处理完毕后定位输出目录', key="open_folder", default=True), sg.Button('清空输出'), ], [sg.Text("@小小明:https://blog.csdn.net/as604049322"), ], ] def resource_path(relative_path): base_path = getattr(sys, '_MEIPASS', os.path.dirname(os.path.abspath(__file__))) return os.path.join(base_path, relative_path) title = 'PDF常用工具箱 v0.2' window = sg.Window(title, layout, icon=resource_path("./pdf.ico")) window.finalize() if __name__ == '__main__': while True: event, values = window.read() if event in (None,): break # 相当于关闭界面 elif event == "开始拆分": save_dir = os.path.abspath(values['save_dir']) os.makedirs(values['save_dir'], exist_ok=True) split_pdf(values["filename"], save_dir, int(values["n"])) window["save_dir"].Update(save_dir) if values["open_folder"]: sub.Popen(f"explorer {save_dir}", shell=False) elif event == "打开输出目录": save_dir = os.path.abspath(values['save_dir']) if os.path.exists(save_dir): sub.Popen(f"explorer {save_dir}", shell=False) else: sg.popup("输出目录不存在!") window["save_dir"].Update(os.path.abspath("./output")) elif event == "开始合并": pdf_dir = os.path.abspath(values['pdf_dir']) out_name = os.path.abspath(values['out_name']) merge_pdf(pdf_dir, out_name, values["add_bookmark"]) window["out_name"].Update(out_name) if values["open_folder"]: sub.Popen(f"explorer {os.path.dirname(out_name)}", shell=False) elif event == "定位输出位置": out_name = os.path.abspath(values['out_name']) sub.Popen(f"explorer {os.path.dirname(out_name)}", shell=False) elif event == "提取书签": pdf_file = os.path.abspath(values['pdf_file']) bookmark_name = os.path.abspath(values['bookmark_name']) bookmark = get_pdf_Bookmark(pdf_file) window["bookmark_name"].Update(bookmark_name) write_bookmark2file(bookmark, filename=bookmark_name) if values["open_folder"]: sub.Popen(f"explorer {os.path.dirname(bookmark_name)}", shell=False) elif event == "写入书签": pdf_file = os.path.abspath(values['pdf_file']) bookmark_name = os.path.abspath(values['bookmark_name']) bookmark = read_bookmark_from_file(bookmark_name, int(values["diff"])) window["bookmark_name"].Update(bookmark_name) pdf_write_bookmark(bookmark, pdf_file, values["compress"]) if values["open_folder"]: sub.Popen(f"explorer {os.path.dirname(pdf_file)}", shell=False) elif event == "定位书签位置": bookmark_name = os.path.abspath(values['bookmark_name']) sub.Popen(f"explorer {os.path.dirname(bookmark_name)}", shell=False) # elif event == "添加水印": # pdf_file = os.path.abspath(values['pdf_file2']) # result_file = pdf_add_water_mark(pdf_file, values["text"], values["overwrite"], # int(values["col"]), int(values["row"]), # values["font_file"], int(values["font_szie"]), # int(values["rotate"]), float(values["alpha"])) # print("水印保存到", result_file) # if values["open_folder"]: # sub.Popen(f"explorer {os.path.dirname(result_file)}", shell=False) elif event == "清空输出": window["out"].Update("") window.close()