Jul 1, 2024
pip install PyPDFfrom PyPDF2 import PdfReader
reader = PdfReader('file.pdf')
length = len(reader.pages)
page = reader.pages[0]
text = page.extract_text()
for page in reader.pages:
text = page.extract_text()
page.images and saving them.
for image in page.images:
with open(image.name, 'wb') as f:
f.write(image.data)
pip install pdfplumberimport pdfplumber
with pdfplumber.open('file.pdf') as f:
for page in f.pages:
tables = page.extract_table()
pip install pymupdfimport fitz
doc = fitz.open('file.pdf')
page_count = doc.page_count
metadata = doc.metadata
page = doc.load_page(0)
text = page.get_text()
pix = page.get_pixmap()
pix.save(f'page_{page.number}.png')
links = page.get_links()