未验证 提交 9c0d3652 编写于 作者: D David Amos

Add article source code files

上级 1e0b8bc9
......@@ -7,5 +7,4 @@ There are two subfolders in this folder:
1. **`practice_files/`:** Contains the sample PDFs used in the chapter
2. **`source_code/`:** Contains source code from the chapter
TODO:
- [ ] Add source code files
The source code files are organized by section of the article, and the start of each subsection is indicated with comments.
# ---------------
# Open a PDF File
# ---------------
from PyPDF2 import PdfFileReader
# You might need to change this to match the path on your computer
from pathlib import Path
pdf_path = (
Path.home() /
"creating-and-modifying-pdfs" /
"practice_files" /
"Pride_and_Prejudice.pdf"
)
pdf = PdfFileReader(str(pdf_path))
print(pdf.getNumPages())
print(pdf.documentInfo)
print(pdf.documentInfo.title)
# ---------------------------
# Extracting Text From a Page
# ---------------------------
first_page = pdf.getPage(0)
print(type(first_page))
print(first_page.extractText())
for page in pdf.pages:
print(page.extractText())
# -----------------------
# Putting It All Together
# -----------------------
from pathlib import Path
from PyPDF2 import PdfFileReader
# Change the path below to the correct path for your computer.
pdf_path = (
Path.home() /
"creating-and-modifying-pdfs" /
"practice-files" /
"Pride_and_Prejudice.pdf"
)
pdf_reader = PdfFileReader(str(pdf_path))
output_file_path = Path.home() / "Pride_and_Prejudice.txt"
with output_file_path.open(mode="w") as output_file:
title = pdf_reader.documentInfo.title
num_pages = pdf_reader.getNumPages()
output_file.write(f"{title}\\nNumber of pages: {num_pages}\\n\\n")
for page in pdf_reader.pages:
text = page.extractText()
output_file.write(text)
# -----------------------------
# Using the PdfFileWriter Class
# -----------------------------
from PyPDF2 import PdfFileWriter
pdf_writer = PdfFileWriter()
page = pdf_writer.addBlankPage(width=72, height=72)
print(type(page))
from pathlib import Path
with Path("blank.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
# -----------------------------------
# Extracting a Single Page From a PDF
# -----------------------------------
from pathlib import Path
from PyPDF2 import PdfFileReader, PdfFileWriter
# Change the path to work on your computer if necessary
pdf_path = (
Path.home() /
"creating-and-modifying-pdfs" /
"practice_files" /
"Pride_and_Prejudice.pdf"
)
input_pdf = PdfFileReader(str(pdf_path))
first_page = input_pdf.getPage(0)
pdf_writer = PdfFileWriter()
pdf_writer.addPage(first_page)
with Path("first_page.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
# ------------------------------------
# Extracting Multiple Pages From a PDF
# ------------------------------------
from PyPDF2 import PdfFileReader, PdfFileWriter
from pathlib import Path
pdf_path = (
Path.home() /
"creating-and-modifying-pdfs" /
"practice_files" /
"Pride_and_Prejudice.pdf"
)
input_pdf = PdfFileReader(str(pdf_path))
pdf_writer = PdfFileWriter()
for n in range(1, 4):
page = input_pdf.getPage(n)
pdf_writer.addPage(page)
print(pdf_writer.getNumPages())
pdf_writer = PdfFileWriter()
for page in input_pdf.pages[1:4]:
pdf_writer.addPage(page)
with Path("chapter1_slice.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
\ No newline at end of file
# -----------------------------
# Using the PdfFileMerger Class
# -----------------------------
from PyPDF2 import PdfFileMerger
pdf_merger = PdfFileMerger()
# ---------------------------------
# Concatenating PDFs With .append()
# ---------------------------------
from pathlib import Path
reports_dir = (
Path.home() /
"creating-and-modifying-pdfs" /
"practice_files" /
"expense_reports"
)
for path in reports_dir.glob("*.pdf"):
print(path.name)
expense_reports = list(reports_dir.glob("*.pdf"))
expense_reports.sort()
for path in expense_reports:
print(path.name)
for path in expense_reports:
pdf_merger.append(str(path))
with Path("expense_reports.pdf").open(mode="wb") as output_file:
pdf_merger.write(output_file)
# --------------------------
# Merging PDFs With .merge()
# --------------------------
from pathlib import Path
from PyPDF2 import PdfFileMerger
report_dir = (
Path.home() /
"creating-and-modifying-pdfs" /
"practice_files" /
"quarterly_report"
)
report_path = report_dir / "report.pdf"
toc_path = report_dir / "toc.pdf"
pdf_merger = PdfFileMerger()
pdf_merger.append(str(report_path))
pdf_merger.merge(1, str(toc_path))
with Path("full_report.pdf").open(mode="wb") as output_file:
pdf_merger.write(output_file)
\ No newline at end of file
# --------------
# Rotating Pages
# --------------
from pathlib import Path
from PyPDF2 import PdfFileReader, PdfFileWriter
pdf_path = (
Path.home() /
"creating-and-modifying-pdfs" /
"practice_files" /
"ugly.pdf"
)
pdf_reader = PdfFileReader(str(pdf_path))
pdf_writer = PdfFileWriter()
for n in range(pdf_reader.getNumPages()):
page = pdf_reader.getPage(n)
if n % 2 == 0:
page.rotateClockwise(90)
pdf_writer.addPage(page)
with Path("ugly_rotated.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
pdf_reader = PdfFileReader(str(pdf_path))
print(pdf_reader.getPage(0))
page = pdf_reader.getPage(0)
print(page["/Rotate"])
page = pdf_reader.getPage(1)
print(page["/Rotate"])
page = pdf_reader.getPage(0)
print(page["/Rotate"])
page.rotateClockwise(90)
print(page["/Rotate"])
pdf_reader = PdfFileReader(str(pdf_path))
pdf_writer = PdfFileWriter()
for page in pdf_reader.pages:
if page["/Rotate"] == -90:
page.rotateClockwise(90)
pdf_writer.addPage(page)
with Path("ugly_rotated2.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
# --------------
# Cropping Pages
# --------------
from pathlib import Path
from PyPDF2 import PdfFileReader, PdfFileWriter
pdf_path = (
Path.home() /
"creating-and-modifying-pdfs" /
"practice_files" /
"half_and_half.pdf"
)
pdf_reader = PdfFileReader(str(pdf_path))
first_page = pdf_reader.getPage(0)
print(first_page.mediaBox)
print(first_page.mediaBox.lowerLeft)
print(first_page.mediaBox.lowerRight)
print(first_page.mediaBox.upperLeft)
print(first_page.mediaBox.upperRight)
print(first_page.mediaBox.upperRight[0])
print(first_page.mediaBox.upperRight[1])
first_page.mediaBox.upperLeft = (0, 480)
print(first_page.mediaBox.upperLeft)
print(first_page.mediaBox.upperRight)
pdf_writer = PdfFileWriter()
pdf_writer.addPage(first_page)
with Path("cropped_page.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
pdf_reader = PdfFileReader(str(pdf_path))
pdf_writer = PdfFileWriter()
first_page = pdf_reader.getPage(0)
import copy
left_side = copy.deepcopy(first_page)
current_coords = left_side.mediaBox.upperRight
new_coords = (current_coords[0] / 2, current_coords[1])
left_side.mediaBox.upperRight = new_coords
right_side = copy.deepcopy(first_page)
right_side.mediaBox.upperLeft = new_coords
pdf_writer.addPage(left_side)
pdf_writer.addPage(right_side)
with Path("cropped_pages.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
\ No newline at end of file
# ---------------
# Encrypting PDFs
# ---------------
from pathlib import Path
from PyPDF2 import PdfFileReader, PdfFileWriter
pdf_path = (
Path.home() /
"creating-and-modifying-pdfs" /
"practice_files" /
"newsletter.pdf"
)
pdf_reader = PdfFileReader(str(pdf_path))
pdf_writer = PdfFileWriter()
pdf_writer.appendPagesFromReader(pdf_reader)
pdf_writer.encrypt(user_pwd="SuperSecret")
output_path = Path.home() / "newsletter_protected.pdf"
with output_path.open(mode="wb") as output_file:
pdf_writer.write(output_file)
user_pwd = "SuperSecret"
owner_pwd = "ReallySuperSecret"
pdf_writer.encrypt(user_pwd=user_pwd, owner_pwd=owner_pwd)
# ---------------
# Decrypting PDFs
# ---------------
from pathlib import Path
from PyPDF2 import PdfFileReader, PdfFileWriter
pdf_path = Path.home() / "newsletter_protected.pdf"
pdf_reader = PdfFileReader(str(pdf_path))
print(pdf_reader.getPage(0)) # Raises PdfReadError
print(pdf_reader.decrypt(password="SuperSecret"))
print(pdf_reader.getPage(0))
\ No newline at end of file
# ----------------------
# Using the Canvas Class
# ----------------------
from reportlab.pdfgen.canvas import Canvas
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册