Smart Resume Gap Detector

pip install spacy pandas dateparser

python -m spacy download en_core_web_sm


import re

import spacy

import pandas as pd

import dateparser

from datetime import datetime

from pathlib import Path


# Optional PDF reader (only if you want to support .pdf)

try:

    import fitz  # PyMuPDF

except ImportError:

    fitz = None


nlp = spacy.load("en_core_web_sm")


# ----------------------------

# Skill upgrade recommendations

# ----------------------------

SKILL_UPGRADE_MAP = {

    "developer": ["AI/ML fundamentals", "Cloud platforms (AWS, Azure, GCP)", "DevOps basics"],

    "data": ["Data visualization (Power BI, Tableau)", "SQL optimization", "Machine learning pipelines"],

    "designer": ["Figma advanced", "UI motion design", "UX research"],

    "manager": ["Agile certification (Scrum)", "People analytics", "Data-driven decision-making"],

    "analyst": ["Data storytelling", "Python for data", "Business intelligence tools"],

    "tester": ["Automation (Selenium, Cypress)", "Performance testing", "API testing"],

    "student": ["Internships", "Portfolio projects", "Personal GitHub projects"],

}



# ----------------------------

# Helper functions

# ----------------------------


def extract_text(file_path):

    """Extracts text from .txt or .pdf resume"""

    p = Path(file_path)

    if not p.exists():

        raise FileNotFoundError(p)

    if p.suffix.lower() == ".pdf" and fitz:

        doc = fitz.open(file_path)

        text = " ".join([page.get_text("text") for page in doc])

        return text

    elif p.suffix.lower() == ".txt":

        return open(file_path, "r", encoding="utf-8").read()

    else:

        raise ValueError("Please provide a .txt or .pdf file")



def extract_date_ranges(text):

    """Finds date ranges in resume text"""

    # Match patterns like: Jan 2018 - Mar 2020, 2015–2017, July 2019 to Present, etc.

    pattern = r"([A-Za-z]{3,9}\s*\d{4}|\d{4})\s*(?:-|to|–|—)\s*(Present|[A-Za-z]{3,9}\s*\d{4}|\d{4})"

    matches = re.findall(pattern, text, flags=re.IGNORECASE)

    

    date_pairs = []

    for start, end in matches:

        start_date = dateparser.parse(start)

        end_date = datetime.now() if re.search("present", end, re.I) else dateparser.parse(end)

        if start_date and end_date:

            date_pairs.append((start_date.date(), end_date.date()))

    return date_pairs



def detect_jobs(text):

    """Extracts potential job titles using NLP entities + heuristics"""

    doc = nlp(text)

    job_titles = []

    for ent in doc.ents:

        if ent.label_ in ["ORG", "WORK_OF_ART"]:

            continue

        # Common title indicators

        if re.search(r"(developer|engineer|manager|designer|analyst|intern|tester|consultant|officer)", ent.text, re.I):

            job_titles.append(ent.text.strip())

    return list(set(job_titles))



def calculate_gaps(date_pairs):

    """Find time gaps between consecutive jobs"""

    if not date_pairs:

        return []

    date_pairs = sorted(date_pairs, key=lambda x: x[0])

    gaps = []

    for i in range(1, len(date_pairs)):

        prev_end = date_pairs[i - 1][1]

        curr_start = date_pairs[i][0]

        gap_days = (curr_start - prev_end).days

        if gap_days > 60:  # > 2 months considered a gap

            gaps.append({

                "gap_start": prev_end,

                "gap_end": curr_start,

                "gap_months": round(gap_days / 30.4, 1)

            })

    return gaps



def suggest_skills(jobs):

    """Suggest skills based on last known job title"""

    if not jobs:

        return SKILL_UPGRADE_MAP["student"]

    last_job = jobs[-1].lower()

    for key, recs in SKILL_UPGRADE_MAP.items():

        if key in last_job:

            return recs

    return ["Explore AI basics", "Cloud fundamentals", "Soft skill enhancement"]



# ----------------------------

# Main pipeline

# ----------------------------


def analyze_resume(file_path):

    text = extract_text(file_path)


    date_pairs = extract_date_ranges(text)

    jobs = detect_jobs(text)

    gaps = calculate_gaps(date_pairs)

    skill_recs = suggest_skills(jobs)


    print("\n Analyzing Resume:", file_path)

    print("=" * 60)

    print(f" Detected job titles: {', '.join(jobs) if jobs else 'None found'}")

    print(f" Work periods found: {len(date_pairs)}")


    if gaps:

        print("\n Career Gaps Detected:")

        for g in gaps:

            print(f"   - {g['gap_start']} → {g['gap_end']} ({g['gap_months']} months)")

    else:

        print("\n No significant gaps detected.")


    print("\n Skill Upgrade Suggestions:")

    for s in skill_recs:

        print("   •", s)


    # Optional: return structured result

    result = {

        "jobs": jobs,

        "dates": date_pairs,

        "gaps": gaps,

        "suggestions": skill_recs

    }

    return result



# ----------------------------

# Run Example

# ----------------------------

if __name__ == "__main__":

    import argparse

    parser = argparse.ArgumentParser(description="Smart Resume Gap Detector")

    parser.add_argument("resume_file", help="Path to resume (.txt or .pdf)")

    args = parser.parse_args()


    analyze_resume(args.resume_file)