Resume Parser & Analyzer


pip install spacy pdfminer.six python-docx pandas nltk

python -m spacy download en_core_web_sm


import re

import spacy

import pdfminer.high_level

import docx

import nltk

from collections import Counter


nltk.download("stopwords")

from nltk.corpus import stopwords


# Load spaCy NLP model

nlp = spacy.load("en_core_web_sm")



# Function to extract text from PDF

def extract_text_from_pdf(pdf_path):

    return pdfminer.high_level.extract_text(pdf_path)



# Function to extract text from DOCX

def extract_text_from_docx(docx_path):

    doc = docx.Document(docx_path)

    return "\n".join([para.text for para in doc.paragraphs])



# Function to extract email from text

def extract_email(text):

    email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"

    emails = re.findall(email_pattern, text)

    return emails[0] if emails else None



# Function to extract phone number from text

def extract_phone(text):

    phone_pattern = r"\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}"

    phones = re.findall(phone_pattern, text)

    return phones[0] if phones else None



# Function to extract skills from text

def extract_skills(text):

    skills_list = ["Python", "Java", "C++", "Machine Learning", "Data Science", "SQL", "Django", "React", "Flask"]

    found_skills = [skill for skill in skills_list if skill.lower() in text.lower()]

    return found_skills



# Function to extract name using NLP

def extract_name(text):

    doc = nlp(text)

    for ent in doc.ents:

        if ent.label_ == "PERSON":

            return ent.text

    return None



# Function to match skills with a job description

def match_skills(resume_skills, job_description):

    job_tokens = nltk.word_tokenize(job_description.lower())

    stop_words = set(stopwords.words("english"))

    filtered_job_tokens = [word for word in job_tokens if word not in stop_words]


    skill_match_count = sum(1 for skill in resume_skills if skill.lower() in filtered_job_tokens)

    match_percentage = (skill_match_count / len(resume_skills)) * 100 if resume_skills else 0

    return round(match_percentage, 2)



# Main function

def analyze_resume(file_path, job_description):

    # Extract text

    text = extract_text_from_pdf(file_path) if file_path.endswith(".pdf") else extract_text_from_docx(file_path)


    # Extract details

    name = extract_name(text)

    email = extract_email(text)

    phone = extract_phone(text)

    skills = extract_skills(text)

    match_percentage = match_skills(skills, job_description)


    # Display results

    print("\nšŸ“„ Resume Analysis Results:")

    print(f"šŸ‘¤ Name: {name}")

    print(f"šŸ“§ Email: {email}")

    print(f"šŸ“ž Phone: {phone}")

    print(f"šŸ›  Skills: {', '.join(skills)}")

    print(f"✅ Skill Match with Job: {match_percentage}%")


    return {"name": name, "email": email, "phone": phone, "skills": skills, "match_percentage": match_percentage}


No comments: