pip install spacy pandas dateparser
python -m spacy download en_core_web_sm
import spacy
import pandas as pd
import dateparser
from datetime import datetime
from pathlib import Path
# Optional PDF reader (only if you want to support .pdf)
try:
import fitz # PyMuPDF
except ImportError:
fitz = None
nlp = spacy.load("en_core_web_sm")
# ----------------------------
# Skill upgrade recommendations
# ----------------------------
SKILL_UPGRADE_MAP = {
"developer": ["AI/ML fundamentals", "Cloud platforms (AWS, Azure, GCP)", "DevOps basics"],
"data": ["Data visualization (Power BI, Tableau)", "SQL optimization", "Machine learning pipelines"],
"designer": ["Figma advanced", "UI motion design", "UX research"],
"manager": ["Agile certification (Scrum)", "People analytics", "Data-driven decision-making"],
"analyst": ["Data storytelling", "Python for data", "Business intelligence tools"],
"tester": ["Automation (Selenium, Cypress)", "Performance testing", "API testing"],
"student": ["Internships", "Portfolio projects", "Personal GitHub projects"],
}
# ----------------------------
# Helper functions
# ----------------------------
def extract_text(file_path):
"""Extracts text from .txt or .pdf resume"""
p = Path(file_path)
if not p.exists():
raise FileNotFoundError(p)
if p.suffix.lower() == ".pdf" and fitz:
doc = fitz.open(file_path)
text = " ".join([page.get_text("text") for page in doc])
return text
elif p.suffix.lower() == ".txt":
return open(file_path, "r", encoding="utf-8").read()
else:
raise ValueError("Please provide a .txt or .pdf file")
def extract_date_ranges(text):
"""Finds date ranges in resume text"""
# Match patterns like: Jan 2018 - Mar 2020, 2015–2017, July 2019 to Present, etc.
pattern = r"([A-Za-z]{3,9}\s*\d{4}|\d{4})\s*(?:-|to|–|—)\s*(Present|[A-Za-z]{3,9}\s*\d{4}|\d{4})"
matches = re.findall(pattern, text, flags=re.IGNORECASE)
date_pairs = []
for start, end in matches:
start_date = dateparser.parse(start)
end_date = datetime.now() if re.search("present", end, re.I) else dateparser.parse(end)
if start_date and end_date:
date_pairs.append((start_date.date(), end_date.date()))
return date_pairs
def detect_jobs(text):
"""Extracts potential job titles using NLP entities + heuristics"""
doc = nlp(text)
job_titles = []
for ent in doc.ents:
if ent.label_ in ["ORG", "WORK_OF_ART"]:
continue
# Common title indicators
if re.search(r"(developer|engineer|manager|designer|analyst|intern|tester|consultant|officer)", ent.text, re.I):
job_titles.append(ent.text.strip())
return list(set(job_titles))
def calculate_gaps(date_pairs):
"""Find time gaps between consecutive jobs"""
if not date_pairs:
return []
date_pairs = sorted(date_pairs, key=lambda x: x[0])
gaps = []
for i in range(1, len(date_pairs)):
prev_end = date_pairs[i - 1][1]
curr_start = date_pairs[i][0]
gap_days = (curr_start - prev_end).days
if gap_days > 60: # > 2 months considered a gap
gaps.append({
"gap_start": prev_end,
"gap_end": curr_start,
"gap_months": round(gap_days / 30.4, 1)
})
return gaps
def suggest_skills(jobs):
"""Suggest skills based on last known job title"""
if not jobs:
return SKILL_UPGRADE_MAP["student"]
last_job = jobs[-1].lower()
for key, recs in SKILL_UPGRADE_MAP.items():
if key in last_job:
return recs
return ["Explore AI basics", "Cloud fundamentals", "Soft skill enhancement"]
# ----------------------------
# Main pipeline
# ----------------------------
def analyze_resume(file_path):
text = extract_text(file_path)
date_pairs = extract_date_ranges(text)
jobs = detect_jobs(text)
gaps = calculate_gaps(date_pairs)
skill_recs = suggest_skills(jobs)
print("\n Analyzing Resume:", file_path)
print("=" * 60)
print(f" Detected job titles: {', '.join(jobs) if jobs else 'None found'}")
print(f" Work periods found: {len(date_pairs)}")
if gaps:
print("\n Career Gaps Detected:")
for g in gaps:
print(f" - {g['gap_start']} → {g['gap_end']} ({g['gap_months']} months)")
else:
print("\n No significant gaps detected.")
print("\n Skill Upgrade Suggestions:")
for s in skill_recs:
print(" •", s)
# Optional: return structured result
result = {
"jobs": jobs,
"dates": date_pairs,
"gaps": gaps,
"suggestions": skill_recs
}
return result
# ----------------------------
# Run Example
# ----------------------------
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Smart Resume Gap Detector")
parser.add_argument("resume_file", help="Path to resume (.txt or .pdf)")
args = parser.parse_args()
analyze_resume(args.resume_file)