Allgemein,  general,  Programmieren,  programming

Split a PDF into single pages

import pdfplumber
from PyPDF2 import PdfReader, PdfWriter
import os

input_pdf = sys.argv[1]
output_dir = sys.argv[1]

os.makedirs(output_dir, exist_ok=True)

reader = PdfReader(input_pdf)

with pdfplumber.open(input_pdf) as pdf:
    for i, page in enumerate(pdf.pages):
        w, h = page.width, page.height
        bbox = (0, 0, w * 0.45, h * 0.15)   # (x0, y0, x1, y1) -> top-left area
        cropped_text = page.within_bbox(bbox).extract_text() or ""
        if cropped_text.strip():
            first_line = cropped_text.strip().splitlines()[0].lower()
            safe_title = ''.join(c if c.isalnum() or c in (' ', '-', '_') else '' for c in first_line).replace(' ', '-')
        else:
            safe_title = f"Page_{i}"

        filename = f"{safe_title or f'Page_{i+1}'}.pdf"

        writer = PdfWriter()
        writer.add_page(reader.pages[i])
        with open(os.path.join(output_dir, filename), "wb") as f:
            writer.write(f)

print(f"Done splitting the PDF. The single pages are saved in '{output_dir}'")
25430cookie-checkSplit a PDF into single pages

Leave a Reply

Your email address will not be published. Required fields are marked *