Smart PDF Page Extractor

import fitz  # PyMuPDF

import tkinter as tk

from tkinter import filedialog, messagebox, ttk

from PIL import Image, ImageTk



class PDFExtractorApp:

    def __init__(self, root):

        self.root = root

        self.root.title("Smart PDF Page Extractor")

        self.root.geometry("900x600")


        self.pdf_doc = None

        self.page_images = []

        self.selected_pages = []


        self.create_ui()


    # -------------------------------------------------------------

    # UI SETUP

    # -------------------------------------------------------------

    def create_ui(self):

        top_frame = tk.Frame(self.root)

        top_frame.pack(fill=tk.X, pady=10)


        tk.Button(top_frame, text="Open PDF", command=self.open_pdf).pack(side=tk.LEFT, padx=10)

        tk.Button(top_frame, text="Extract Selected", command=self.extract_pages).pack(side=tk.LEFT, padx=10)

        tk.Button(top_frame, text="Save New PDF", command=self.save_new_pdf).pack(side=tk.LEFT, padx=10)


        self.canvas = tk.Canvas(self.root, bg="white")

        self.scroll_y = tk.Scrollbar(self.root, orient="vertical", command=self.canvas.yview)

        self.canvas.configure(yscrollcommand=self.scroll_y.set)


        self.frame = tk.Frame(self.canvas)

        self.canvas.create_window((0, 0), window=self.frame, anchor="nw")


        self.canvas.pack(side=tk.LEFT, fill="both", expand=True)

        self.scroll_y.pack(side=tk.RIGHT, fill="y")


        self.frame.bind("<Configure>", lambda e: self.canvas.configure(scrollregion=self.canvas.bbox("all")))


    # -------------------------------------------------------------

    # OPEN PDF

    # -------------------------------------------------------------

    def open_pdf(self):

        filepath = filedialog.askopenfilename(

            filetypes=[("PDF Files", "*.pdf")],

            title="Choose a PDF"

        )

        if not filepath:

            return


        try:

            self.pdf_doc = fitz.open(filepath)

            self.load_pages()

        except Exception as e:

            messagebox.showerror("Error", f"Failed to open PDF:\n{e}")


    # -------------------------------------------------------------

    # LOAD AND DISPLAY PAGE THUMBNAILS

    # -------------------------------------------------------------

    def load_pages(self):

        for widget in self.frame.winfo_children():

            widget.destroy()


        self.selected_pages = []

        self.page_images = []


        for page_num in range(len(self.pdf_doc)):

            page = self.pdf_doc.load_page(page_num)

            pix = page.get_pixmap(matrix=fitz.Matrix(0.3, 0.3))  # thumbnail scale


            image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

            img_tk = ImageTk.PhotoImage(image)


            self.page_images.append(img_tk)


            page_frame = tk.Frame(self.frame, bd=2, relief="groove")

            page_frame.pack(padx=10, pady=10, anchor="w")


            label = tk.Label(page_frame, image=img_tk)

            label.pack()


            tk.Label(page_frame, text=f"Page {page_num + 1}", font=("Arial", 12, "bold")).pack()


            btn = tk.Button(

                page_frame,

                text="Select",

                command=lambda n=page_num: self.toggle_page(n)

            )

            btn.pack(pady=5)


    # -------------------------------------------------------------

    # SELECT / UNSELECT PAGE

    # -------------------------------------------------------------

    def toggle_page(self, page_num):

        if page_num in self.selected_pages:

            self.selected_pages.remove(page_num)

            messagebox.showinfo("Page Unselected", f"Page {page_num + 1} removed.")

        else:

            self.selected_pages.append(page_num)

            messagebox.showinfo("Page Selected", f"Page {page_num + 1} selected.")


    # -------------------------------------------------------------

    # EXTRACT SELECTED PAGES

    # -------------------------------------------------------------

    def extract_pages(self):

        if not self.selected_pages:

            messagebox.showerror("Error", "No pages selected!")

            return


        self.selected_pages.sort()


        messagebox.showinfo("Selected Pages", f"Extracting pages: {[p+1 for p in self.selected_pages]}")


    # -------------------------------------------------------------

    # SAVE NEW PDF WITH SELECTED PAGES

    # -------------------------------------------------------------

    def save_new_pdf(self):

        if not self.selected_pages:

            messagebox.showerror("Error", "Select pages before saving!")

            return


        output_path = filedialog.asksaveasfilename(

            defaultextension=".pdf",

            filetypes=[("PDF Files", "*.pdf")],

            title="Save Extracted PDF"

        )


        if not output_path:

            return


        try:

            new_pdf = fitz.open()


            for page_num in self.selected_pages:

                new_pdf.insert_pdf(self.pdf_doc, from_page=page_num, to_page=page_num)


            new_pdf.save(output_path)

            new_pdf.close()


            messagebox.showinfo("Success", "New PDF saved successfully!")


        except Exception as e:

            messagebox.showerror("Error", f"Failed to save PDF:\n{e}")



# -------------------------------------------------------------

# RUN APP

# -------------------------------------------------------------

root = tk.Tk()

app = PDFExtractorApp(root)

root.mainloop()


No comments: