提交 0900614a 编写于 作者: Q qq_33866770

Wed Mar 13 18:15:00 CST 2024 inscode

上级 0520122c
import streamlit as st
from streamlit_option_menu import option_menu
import inscode
from PyPDF2 import PdfFileReader
# 设置Streamlit应用程序的标题
st.set_page_config(page_title="app name", layout="wide")
st.title("PDF文件文字提取")
menu1="菜单1"
menu2="菜单2"
uploaded_file = st.file_uploader("请上传PDF文件", type=["pdf"])
with st.sidebar:
menu = option_menu("菜单", [menu1, menu2],
icons=['house', "list-task"],
menu_icon="cast", default_index=0)
if uploaded_file is not None:
# 将上传的文件保存到本地
with open("temp.pdf", "wb") as f:
f.write(uploaded_file.getbuffer())
def main():
# 获取PDF文件中的页数
pdf_file = PdfFileReader(open("temp.pdf", "rb"))
num_of_pages = pdf_file.getNumPages()
if menu == menu1:
st.subheader(f"{menu1}")
# 逐页转换PDF文件为文本格式
text = ""
for page in range(num_of_pages):
text += pdf_file.getPage(page).extractText()
if menu == menu2:
st.subheader(f"{menu2}")
# 使用OCR进行图像识别
result = inscode.ocr("pdf_file", "temp.pdf")
st.write("PDF文件中的文本信息为:")
st.write(text)
st.write("OCR识别后的文本信息为:")
st.write(result)
if __name__ == '__main__':
main()
# 保存提取后的文本信息到本地文件
with open("text.txt", "w", encoding="utf-8") as f:
f.write(text)
st.write("已将提取后的文本信息保存到 text.txt 文件中。")
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册