未验证 提交 4110820d 编写于 作者: D David Amos 提交者: GitHub

Merge pull request #110 from realpython/creating-modifying-pdf

creating-modifying-pdfs
# Creating and Modifying PDFs With Python
This folder contains resources and materials for Real Python's [Creating and Modifying PDFs With Python](https://realpython.com/creating-modifying-pdf/) tutorial.
There are two subfolders in this folder:
1. **`practice_files/`:** Contains the sample PDFs used in the chapter
2. **`source_code/`:** Contains source code from the chapter
The source code files are organized by section of the article, and the start of each subsection is indicated with comments.
The content of the companion tutorial was adapted from the "Creating and Modifying PDF Files" chapter of the book [*Python Basics: A Practical Introduction to Python 3*](https://realpython.com/products/python-basics-book/). If you enjoy this tutorial, check out the full book!
# ---------------
# Open a PDF File
# ---------------
from PyPDF2 import PdfFileReader
# You might need to change this to match the path on your computer
from pathlib import Path
pdf_path = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice_files"
/ "Pride_and_Prejudice.pdf"
)
pdf = PdfFileReader(str(pdf_path))
print(pdf.getNumPages())
print(pdf.documentInfo)
print(pdf.documentInfo.title)
# ---------------------------
# Extracting Text From a Page
# ---------------------------
first_page = pdf.getPage(0)
print(type(first_page))
print(first_page.extractText())
for page in pdf.pages:
print(page.extractText())
# -----------------------
# Putting It All Together
# -----------------------
from pathlib import Path # noqa
from PyPDF2 import PdfFileReader # noqa
# Change the path below to the correct path for your computer.
pdf_path = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice-files"
/ "Pride_and_Prejudice.pdf"
)
pdf_reader = PdfFileReader(str(pdf_path))
output_file_path = Path.home() / "Pride_and_Prejudice.txt"
with output_file_path.open(mode="w") as output_file:
title = pdf_reader.documentInfo.title
num_pages = pdf_reader.getNumPages()
output_file.write(f"{title}\\nNumber of pages: {num_pages}\\n\\n")
for page in pdf_reader.pages:
text = page.extractText()
output_file.write(text)
# -----------------------------
# Using the PdfFileWriter Class
# -----------------------------
from PyPDF2 import PdfFileWriter
pdf_writer = PdfFileWriter()
page = pdf_writer.addBlankPage(width=72, height=72)
print(type(page))
from pathlib import Path # noqa
with Path("blank.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
# -----------------------------------
# Extracting a Single Page From a PDF
# -----------------------------------
from pathlib import Path # noqa
from PyPDF2 import PdfFileReader, PdfFileWriter # noqa
# Change the path to work on your computer if necessary
pdf_path = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice_files"
/ "Pride_and_Prejudice.pdf"
)
input_pdf = PdfFileReader(str(pdf_path))
first_page = input_pdf.getPage(0)
pdf_writer = PdfFileWriter()
pdf_writer.addPage(first_page)
with Path("first_page.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
# ------------------------------------
# Extracting Multiple Pages From a PDF
# ------------------------------------
from PyPDF2 import PdfFileReader, PdfFileWriter # noqa
from pathlib import Path # noqa
pdf_path = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice_files"
/ "Pride_and_Prejudice.pdf"
)
input_pdf = PdfFileReader(str(pdf_path))
pdf_writer = PdfFileWriter()
for n in range(1, 4):
page = input_pdf.getPage(n)
pdf_writer.addPage(page)
print(pdf_writer.getNumPages())
pdf_writer = PdfFileWriter()
for page in input_pdf.pages[1:4]:
pdf_writer.addPage(page)
with Path("chapter1_slice.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
# -----------------------------
# Using the PdfFileMerger Class
# -----------------------------
from PyPDF2 import PdfFileMerger
pdf_merger = PdfFileMerger()
# ---------------------------------
# Concatenating PDFs With .append()
# ---------------------------------
from pathlib import Path # noqa
reports_dir = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice_files"
/ "expense_reports"
)
for path in reports_dir.glob("*.pdf"):
print(path.name)
expense_reports = list(reports_dir.glob("*.pdf"))
expense_reports.sort()
for path in expense_reports:
print(path.name)
for path in expense_reports:
pdf_merger.append(str(path))
with Path("expense_reports.pdf").open(mode="wb") as output_file:
pdf_merger.write(output_file)
# --------------------------
# Merging PDFs With .merge()
# --------------------------
from pathlib import Path # noqa
from PyPDF2 import PdfFileMerger # noqa
report_dir = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice_files"
/ "quarterly_report"
)
report_path = report_dir / "report.pdf"
toc_path = report_dir / "toc.pdf"
pdf_merger = PdfFileMerger()
pdf_merger.append(str(report_path))
pdf_merger.merge(1, str(toc_path))
with Path("full_report.pdf").open(mode="wb") as output_file:
pdf_merger.write(output_file)
# --------------
# Rotating Pages
# --------------
from pathlib import Path
from PyPDF2 import PdfFileReader, PdfFileWriter
pdf_path = (
Path.home() / "creating-and-modifying-pdfs" / "practice_files" / "ugly.pdf"
)
pdf_reader = PdfFileReader(str(pdf_path))
pdf_writer = PdfFileWriter()
for n in range(pdf_reader.getNumPages()):
page = pdf_reader.getPage(n)
if n % 2 == 0:
page.rotateClockwise(90)
pdf_writer.addPage(page)
with Path("ugly_rotated.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
pdf_reader = PdfFileReader(str(pdf_path))
print(pdf_reader.getPage(0))
page = pdf_reader.getPage(0)
print(page["/Rotate"])
page = pdf_reader.getPage(1)
print(page["/Rotate"])
page = pdf_reader.getPage(0)
print(page["/Rotate"])
page.rotateClockwise(90)
print(page["/Rotate"])
pdf_reader = PdfFileReader(str(pdf_path))
pdf_writer = PdfFileWriter()
for page in pdf_reader.pages:
if page["/Rotate"] == -90:
page.rotateClockwise(90)
pdf_writer.addPage(page)
with Path("ugly_rotated2.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
# --------------
# Cropping Pages
# --------------
from pathlib import Path # noqa
from PyPDF2 import PdfFileReader, PdfFileWriter # noqa
pdf_path = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice_files"
/ "half_and_half.pdf"
)
pdf_reader = PdfFileReader(str(pdf_path))
first_page = pdf_reader.getPage(0)
print(first_page.mediaBox)
print(first_page.mediaBox.lowerLeft)
print(first_page.mediaBox.lowerRight)
print(first_page.mediaBox.upperLeft)
print(first_page.mediaBox.upperRight)
print(first_page.mediaBox.upperRight[0])
print(first_page.mediaBox.upperRight[1])
first_page.mediaBox.upperLeft = (0, 480)
print(first_page.mediaBox.upperLeft)
print(first_page.mediaBox.upperRight)
pdf_writer = PdfFileWriter()
pdf_writer.addPage(first_page)
with Path("cropped_page.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
pdf_reader = PdfFileReader(str(pdf_path))
pdf_writer = PdfFileWriter()
first_page = pdf_reader.getPage(0)
import copy # noqa
left_side = copy.deepcopy(first_page)
current_coords = left_side.mediaBox.upperRight
new_coords = (current_coords[0] / 2, current_coords[1])
left_side.mediaBox.upperRight = new_coords
right_side = copy.deepcopy(first_page)
right_side.mediaBox.upperLeft = new_coords
pdf_writer.addPage(left_side)
pdf_writer.addPage(right_side)
with Path("cropped_pages.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
# ---------------
# Encrypting PDFs
# ---------------
from pathlib import Path
from PyPDF2 import PdfFileReader, PdfFileWriter
pdf_path = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice_files"
/ "newsletter.pdf"
)
pdf_reader = PdfFileReader(str(pdf_path))
pdf_writer = PdfFileWriter()
pdf_writer.appendPagesFromReader(pdf_reader)
pdf_writer.encrypt(user_pwd="SuperSecret")
output_path = Path.home() / "newsletter_protected.pdf"
with output_path.open(mode="wb") as output_file:
pdf_writer.write(output_file)
user_pwd = "SuperSecret"
owner_pwd = "ReallySuperSecret"
pdf_writer.encrypt(user_pwd=user_pwd, owner_pwd=owner_pwd)
# ---------------
# Decrypting PDFs
# ---------------
from pathlib import Path # noqa
from PyPDF2 import PdfFileReader, PdfFileWriter # noqa
pdf_path = Path.home() / "newsletter_protected.pdf"
pdf_reader = PdfFileReader(str(pdf_path))
print(pdf_reader.getPage(0)) # Raises PdfReadError
print(pdf_reader.decrypt(password="SuperSecret"))
print(pdf_reader.getPage(0))
# ----------------------
# Using the Canvas Class
# ----------------------
from reportlab.pdfgen.canvas import Canvas
canvas = Canvas("hello.pdf")
canvas.drawString(72, 72, "Hello, World")
canvas.save()
# ---------------------
# Setting the Page Size
# ---------------------
from reportlab.lib.units import inch, cm # noqa
print(cm)
print(inch)
canvas = Canvas("hello.pdf", pagesize=(8.5 * inch, 11 * inch))
from reportlab.lib.pagesizes import LETTER # noqa
canvas = Canvas("hello.pdf", pagesize=LETTER)
print(LETTER)
# -----------------------
# Setting Font Properties
# -----------------------
canvas = Canvas("font-example.pdf", pagesize=LETTER)
canvas.setFont("Times-Roman", 18)
canvas.drawString(1 * inch, 10 * inch, "Times New Roman (18 pt)")
canvas.save()
# The code below creates a PDF with blue text
from reportlab.lib.colors import blue # noqa
from reportlab.lib.pagesizes import LETTER # noqa
from reportlab.lib.units import inch # noqa
from reportlab.pdfgen.canvas import Canvas # noqa
canvas = Canvas("font-colors.pdf", pagesize=LETTER)
# Set font to Times New Roman with 12-point size
canvas.setFont("Times-Roman", 12)
# Draw blue text one inch from the left and ten
# inches from the bottom
canvas.setFillColor(blue)
canvas.drawString(1 * inch, 10 * inch, "Blue text")
# Save the PDF file
canvas.save()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册