Python for Engineers : AI-Focused Book Summarizer & Chapter Highlighter

import fitz # PyMuPDF

import nltk

import re

from transformers import pipeline

# Load HuggingFace summarizer

summarizer = pipeline(

"summarization",

model="facebook/bart-large-cnn"

)

# ----------------------------------------

# 1. Extract text from PDF

# ----------------------------------------

def extract_pdf_text(pdf_path):

doc = fitz.open(pdf_path)

full_text = ""

for page in doc:

full_text += page.get_text()

return full_text

# ----------------------------------------

# 2. Split text into chapters

# Uses patterns like:

# - Chapter 1

# - CHAPTER I

# - CHAPTER ONE

# ----------------------------------------

def split_into_chapters(text):

chapter_pattern = r"(chapter\s+\d+|chapter\s+[ivxlcdm]+|chapter\s+\w+)"

found = re.split(chapter_pattern, text, flags=re.IGNORECASE)

chapters = []

for i in range(1, len(found), 2):

title = found[i].strip()

content = found[i + 1].strip()

chapters.append((title, content))

# If no chapters are detected → return full book as one chapter

if not chapters:

return [("Full Book", text)]

return chapters

# ----------------------------------------

# 3. Summarize long text in safe chunks

# ----------------------------------------

def summarize_long_text(text, max_chunk=1500):

nltk.download("punkt", quiet=True)

sentences = nltk.sent_tokenize(text)

chunks = []

current_chunk = ""

for sentence in sentences:

if len(current_chunk) + len(sentence) <= max_chunk:

current_chunk += " " + sentence

else:

chunks.append(current_chunk)

current_chunk = sentence

if current_chunk:

chunks.append(current_chunk)

# Summarize each chunk

summaries = []

for chunk in chunks:

summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]

summaries.append(summary)

# Join all summaries

return "\n".join(summaries)

# ----------------------------------------

# 4. Generate key points

# ----------------------------------------

def generate_key_points(summary):

sentences = nltk.sent_tokenize(summary)

key_points = sentences[:5] # Top 5 key ideas

return ["• " + s for s in key_points]

# ----------------------------------------

# 5. End-to-End Pipeline

# ----------------------------------------

def summarize_book(pdf_path):

print("\n📘 Extracting PDF text...")

text = extract_pdf_text(pdf_path)

print("✂ Splitting into chapters...")

chapters = split_into_chapters(text)

results = []

for idx, (title, content) in enumerate(chapters, start=1):

print(f"\n📝 Summarizing {title}...")

summary = summarize_long_text(content)

key_points = generate_key_points(summary)

results.append({

"chapter_title": title,

"summary": summary,

"key_points": key_points

})

return results

# ----------------------------------------

# 6. Print Results Nicely

# ----------------------------------------

def display_results(results):

print("\n============================")

print("📚 BOOK SUMMARY REPORT")

print("============================\n")

for item in results:

print(f"\n===== {item['chapter_title']} =====\n")

print("SUMMARY:\n")

print(item["summary"])

print("\nKEY POINTS:")

for p in item["key_points"]:

print(p)

print("\n---------------------------")

# ----------------------------------------

# RUN

# ----------------------------------------

if __name__ == "__main__":

pdf_path = input("Enter PDF file path: ").strip()

results = summarize_book(pdf_path)

display_results(results)

print("\n✔ Done! All chapters processed.")

Python for Engineers

Blog Pages

AI-Focused Book Summarizer & Chapter Highlighter

No comments: