提交 78dee21c 编写于 作者: 小小明-代码实体's avatar 小小明-代码实体

更新pdf_tools.py, pdf_tools.exe

上级 990b62a7
文件已添加
import math
import os
import re
import subprocess as sub
import sys
import warnings
from glob import glob
import PySimpleGUI as sg
from PyPDF2 import PdfFileReader, PdfFileWriter
from pikepdf import Pdf, Rectangle
from reportlab.lib import units
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfgen import canvas
warnings.filterwarnings("ignore")
def pdf_add_water_mark(pdf_file, text, overwrite=False, col=2, row=3, font_file="C:\Windows\Fonts\msyh.ttc",
font_szie=35, rotate=45, alpha=0.3):
pdfmetrics.registerFont(TTFont('msyh', font_file)) # 加载中文字体
c = canvas.Canvas("watermark.pdf", pagesize=(
200 * units.mm, 200 * units.mm))
c.translate(20 * units.mm, 20 * units.mm)
c.rotate(rotate) # 把水印文字旋转45°
c.setFont('msyh', font_szie) # 字体大小
c.setStrokeColorRGB(0, 0, 0) # 设置字体颜色
c.setFillColorRGB(0, 0, 0) # 设置填充颜色
c.setFillAlpha(alpha) # 设置透明度,越小越透明
c.drawString(0, 0, text)
c.save()
water_mark_pdf = Pdf.open("watermark.pdf")
water_mark = water_mark_pdf.pages[0]
target = Pdf.open(pdf_file, allow_overwriting_input=True)
for page in target.pages:
_, _, w, h = page.trimbox
for x in range(col): # 每一行显示多少列水印
for y in range(row): # 每一页显示多少行PDF
page.add_overlay(water_mark,
Rectangle(w * x / col,
h * y / row,
w * (x + 1) / col,
h * (y + 1) / row))
if overwrite:
file = None
else:
file = pdf_file.replace(".pdf", "_带水印.pdf")
target.save(file)
return file or pdf_file
sg.change_look_and_feel("Python")
def write_pdf(save_filepath, pdf_writer):
with open(save_filepath, "wb") as out:
pdf_writer.write(out)
def split_pdf(filename, output, n=3):
basename = os.path.basename(filename)
name, ext = os.path.splitext(basename)
pdf_reader = PdfFileReader(filename)
pdf_writer = PdfFileWriter()
os.makedirs(output, exist_ok=True)
page_num = len(pdf_reader.pages)
print(f"pdf共{page_num}页,要写出{math.ceil(page_num / n)}页")
for i, page in enumerate(pdf_reader.pages, 1):
page.compressContentStreams()
pdf_writer.addPage(page)
if i % n == 0:
output_name = f"{output}/{name}_{i // n}.pdf"
write_pdf(output_name, pdf_writer)
print(output_name, "写出完成")
pdf_writer = PdfFileWriter()
if i % n != 0:
output_name = f"{output}/{name}_{i // n}.pdf"
write_pdf(output_name, pdf_writer)
print(output_name, "写出完成")
print("拆分完成!")
def get_files(dir_path):
files = glob(f"{dir_path}/*.pdf")
files.sort(key=lambda s: [(s, int(n)) for s, n in re.findall('(\D+)(\d+)', f'a{s}0')])
return files
def merge_pdf(dir_path, out_name, add_bookmark=True):
dir_path = dir_path.strip("\\/")
files = get_files(dir_path)
pdf_writer = PdfFileWriter()
output_pages = 0
for pdf_file in files:
pdf_reader = PdfFileReader(pdf_file)
pdf_file = os.path.basename(pdf_file)
pageCount = pdf_reader.getNumPages()
output_pages += pageCount
print(pdf_file, "页数:", pageCount)
for page in pdf_reader.pages:
page.compressContentStreams()
pdf_writer.addPage(page)
if add_bookmark:
pdf_writer.addBookmark(pdf_file[:pdf_file.rfind(".")], output_pages - pageCount, parent=None)
print("总页数:", output_pages)
print("开始写出到文件")
with open(out_name, "wb") as outputfile:
pdf_writer.write(outputfile)
print("PDF文件合并完成")
def get_pdf_Bookmark(filename):
"作者CSDN:https://blog.csdn.net/as604049322"
if isinstance(filename, str):
pdf_reader = PdfFileReader(filename)
else:
pdf_reader = filename
pagecount = pdf_reader.getNumPages()
# 用保存每个标题id所对应的页码
idnum2pagenum = {}
for i in range(pagecount):
page = pdf_reader.getPage(i)
idnum2pagenum[page.indirectRef.idnum] = i
# 保存每个标题对应的标签数据,包括层级,标题和页码索引(页码-1)
bookmark = []
def get_pdf_Bookmark_inter(outlines, tab=0):
for outline in outlines:
if isinstance(outline, list):
get_pdf_Bookmark_inter(outline, tab + 1)
else:
bookmark.append(
(tab, outline['/Title'], idnum2pagenum[outline.page.idnum]))
outlines = pdf_reader.getOutlines()
get_pdf_Bookmark_inter(outlines)
return bookmark
def read_bookmark_from_file(filename="bookmark.txt", diff=0):
bookmark = []
with open(filename, "rb") as f:
import cchardet
bytes = f.read()
content = bytes.decode(cchardet.detect(bytes)['encoding'])
for line in content.splitlines():
l2 = line.rfind("\t")
l1 = line.rfind("\t", 0, l2)
bookmark.append((l1 + 1, line[l1 + 1:l2], int(line[l2 + 1:]) - 1 + diff))
return bookmark
def write_bookmark2file(bookmark, filename="bookmark.txt"):
with open(filename, "w", encoding="gbk") as f:
for tab, title, pagenum in bookmark:
prefix = "\t" * tab
f.write(f"{prefix}{title}\t{pagenum + 1}\n")
print("书签已经写出到文件", filename)
def pdf_write_bookmark(bookmark, pdf_file, compress):
pdf_reader = PdfFileReader(pdf_file)
num_pages = pdf_reader.getNumPages()
pdf_writer = PdfFileWriter()
for page in pdf_reader.pages:
if compress:
page.compressContentStreams()
pdf_writer.addPage(page)
# pdf_reader.
last_cache = [None] * (max(bookmark, key=lambda x: x[0])[0] + 1)
for tab, title, pagenum in bookmark:
if pagenum >= num_pages:
continue
parent = last_cache[tab - 1] if tab > 0 else None
indirect_id = pdf_writer.addBookmark(title, pagenum, parent=parent)
last_cache[tab] = indirect_id
pdf_writer.setPageMode("/UseOutlines")
with open(pdf_file, "wb") as out:
pdf_writer.write(out)
print("已成功将书签写入到", pdf_file)
sg.change_look_and_feel("Python")
# 布局设置
layout = [
[sg.TabGroup([[
sg.Tab('拆分', [
[sg.Text('pdf文件地址:', font=("楷体", 12)),
sg.In(size=(32, 1), key="filename"),
sg.FileBrowse('...', target='filename', file_types=(("PDF Files", "*.pdf"),), initial_folder=".")],
[sg.Text('拆分页数:', font=("楷体", 12)),
sg.In(size=(3, 1), key="n", default_text="5"),
sg.Button('开始拆分'),
sg.Button('打开输出目录'),
],
[sg.Text('输出目录:', font=("楷体", 12)),
sg.In(size=(35, 1), default_text="./output", key="save_dir"),
sg.FolderBrowse('...', target='save_dir', initial_folder="."),
],
]),
sg.Tab('合并', [
[sg.Text('pdf文件目录:', font=("楷体", 12)),
sg.In(size=(32, 1), key="pdf_dir"),
sg.FolderBrowse('...', target='pdf_dir', initial_folder=".")],
[sg.Checkbox('添加文件名作为书签', key="add_bookmark", default=True),
sg.Button('开始合并'),
sg.Button('定位输出位置'),
],
[sg.Text('输出位置:', font=("楷体", 12)),
sg.In(size=(35, 1), default_text="./合并.pdf", key="out_name"),
sg.FileSaveAs('...', target='out_name', file_types=(("PDF Files", "*.pdf"),), initial_folder="."),
],
]),
sg.Tab('书签', [
[sg.Text('pdf文件地址:', font=("楷体", 12)), sg.In(size=(32, 1), key="pdf_file"),
sg.FileBrowse('...', target='pdf_file', file_types=(("PDF Files", "*.pdf"),), initial_folder=".")],
[sg.Button('提取书签'),
sg.Button('定位书签位置'),
],
[sg.Text('书签位置:', font=("楷体", 12)),
sg.In(size=(35, 1), default_text="./bookmark.txt", key="bookmark_name"),
sg.FileSaveAs('...', target='bookmark_name', file_types=(("书签文件", "*.txt"),), initial_folder="."),
],
[sg.Checkbox('压缩', key="compress", default=False),
sg.Text('偏移:'), sg.In(size=(3, 1), default_text="0", key="diff"),
sg.Button('写入书签'), ]
]),
sg.Tab('水印', [
[sg.Text('pdf文件地址:', font=("楷体", 12)),
sg.In(size=(32, 1), key="pdf_file2"),
sg.FileBrowse('...', target='pdf_file2', file_types=(("PDF Files", "*.pdf"),), initial_folder=".")],
[
sg.Text('行:'), sg.In(size=(2, 1), default_text="3", key="row"),
sg.Text('列:'), sg.In(size=(2, 1), default_text="2", key="col"),
sg.Text('旋转:'), sg.In(size=(2, 1), default_text="45", key="rotate"),
sg.Text('透明度:'), sg.In(size=(3, 1), default_text="0.3", key="alpha"),
sg.Checkbox('覆盖', key="overwrite", default=False),
],
[
sg.Text('字体:', font=("楷体", 12)),
sg.In(size=(26, 1), key="font_file", default_text="C:\Windows\Fonts\msyh.ttc"),
sg.FileBrowse('...', target='font_file', initial_folder="."),
sg.Text('大小:'), sg.In(size=(3, 1), default_text="35", key="font_szie"),
],
[sg.Text('水印内容:', font=("楷体", 12)), sg.In(size=(24, 1), key="text"),
sg.Button('添加水印'),
],
]),
]])],
[sg.Output(size=(53, 7), key="out", text_color="#15d36a")],
[
sg.Checkbox('处理完毕后定位输出目录', key="open_folder", default=True),
sg.Button('清空输出'),
],
[sg.Text("@小小明:https://blog.csdn.net/as604049322"), ],
]
def resource_path(relative_path):
base_path = getattr(sys, '_MEIPASS', os.path.dirname(os.path.abspath(__file__)))
return os.path.join(base_path, relative_path)
title = 'PDF常用工具箱 v0.2'
window = sg.Window(title, layout, icon=resource_path("./pdf.ico"))
window.finalize()
if __name__ == '__main__':
while True:
event, values = window.read()
if event in (None,):
break # 相当于关闭界面
elif event == "开始拆分":
save_dir = os.path.abspath(values['save_dir'])
os.makedirs(values['save_dir'], exist_ok=True)
split_pdf(values["filename"], save_dir, int(values["n"]))
window["save_dir"].Update(save_dir)
if values["open_folder"]:
sub.Popen(f"explorer {save_dir}", shell=False)
elif event == "打开输出目录":
save_dir = os.path.abspath(values['save_dir'])
if os.path.exists(save_dir):
sub.Popen(f"explorer {save_dir}", shell=False)
else:
sg.popup("输出目录不存在!")
window["save_dir"].Update(os.path.abspath("./output"))
elif event == "开始合并":
pdf_dir = os.path.abspath(values['pdf_dir'])
out_name = os.path.abspath(values['out_name'])
merge_pdf(pdf_dir, out_name, values["add_bookmark"])
window["out_name"].Update(out_name)
if values["open_folder"]:
sub.Popen(f"explorer {os.path.dirname(out_name)}", shell=False)
elif event == "定位输出位置":
out_name = os.path.abspath(values['out_name'])
sub.Popen(f"explorer {os.path.dirname(out_name)}", shell=False)
elif event == "提取书签":
pdf_file = os.path.abspath(values['pdf_file'])
bookmark_name = os.path.abspath(values['bookmark_name'])
bookmark = get_pdf_Bookmark(pdf_file)
window["bookmark_name"].Update(bookmark_name)
write_bookmark2file(bookmark, filename=bookmark_name)
if values["open_folder"]:
sub.Popen(f"explorer {os.path.dirname(bookmark_name)}", shell=False)
elif event == "写入书签":
pdf_file = os.path.abspath(values['pdf_file'])
bookmark_name = os.path.abspath(values['bookmark_name'])
bookmark = read_bookmark_from_file(bookmark_name, int(values["diff"]))
window["bookmark_name"].Update(bookmark_name)
pdf_write_bookmark(bookmark, pdf_file, values["compress"])
if values["open_folder"]:
sub.Popen(f"explorer {os.path.dirname(pdf_file)}", shell=False)
elif event == "定位书签位置":
bookmark_name = os.path.abspath(values['bookmark_name'])
sub.Popen(f"explorer {os.path.dirname(bookmark_name)}", shell=False)
# elif event == "添加水印":
# pdf_file = os.path.abspath(values['pdf_file2'])
# result_file = pdf_add_water_mark(pdf_file, values["text"], values["overwrite"],
# int(values["col"]), int(values["row"]),
# values["font_file"], int(values["font_szie"]),
# int(values["rotate"]), float(values["alpha"]))
# print("水印保存到", result_file)
# if values["open_folder"]:
# sub.Popen(f"explorer {os.path.dirname(result_file)}", shell=False)
elif event == "清空输出":
window["out"].Update("")
window.close()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册