Resume ATS Scoring Tool

import fitz  # PyMuPDF

import spacy

import re

import pandas as pd


nlp = spacy.load("en_core_web_sm")


# ---------------------------------------

# Utility: Extract text from PDF or TXT

# ---------------------------------------

def extract_text(file_path):

    if file_path.lower().endswith(".pdf"):

        text = ""

        pdf = fitz.open(file_path)

        for page in pdf:

            text += page.get_text()

        return text

    else:

        # for .txt files

        with open(file_path, "r", encoding="utf-8") as f:

            return f.read()


# ---------------------------------------

# Clean & Normalize Text

# ---------------------------------------

def clean_text(text):

    text = text.lower()

    text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)

    text = re.sub(r'\s+', ' ', text)

    return text


# ---------------------------------------

# Extract Keywords Using spaCy

# ---------------------------------------

def extract_keywords(text):

    doc = nlp(text)

    keywords = []


    for token in doc:

        # Keep nouns, verbs, adjectives (important for ATS)

        if token.pos_ in ["NOUN", "PROPN", "VERB", "ADJ"]:

            if len(token.text) > 2:

                keywords.append(token.lemma_.lower())


    return list(set(keywords))


# ---------------------------------------

# ATS Scoring Logic

# ---------------------------------------

def calculate_ats_score(resume_text, jd_text):

    resume_clean = clean_text(resume_text)

    jd_clean = clean_text(jd_text)


    resume_keywords = extract_keywords(resume_clean)

    jd_keywords = extract_keywords(jd_clean)


    matched = [kw for kw in jd_keywords if kw in resume_keywords]

    missing = [kw for kw in jd_keywords if kw not in resume_keywords]


    score = (len(matched) / len(jd_keywords)) * 100 if jd_keywords else 0


    return {

        "ats_score": round(score, 2),

        "matched_keywords": matched,

        "missing_keywords": missing,

        "total_keywords": len(jd_keywords)

    }


# ---------------------------------------

# MAIN FUNCTION

# ---------------------------------------

def ats_tool(resume_file, jobdesc_file):

    resume_text = extract_text(resume_file)

    jd_text = extract_text(jobdesc_file)


    result = calculate_ats_score(resume_text, jd_text)


    print("\n ATS SCORING RESULTS")

    print("--------------------------------")

    print(f"ATS Score: {result['ats_score']}%")

    print(f"Total Keywords in Job Description: {result['total_keywords']}")

    print(f"Matched Keywords ({len(result['matched_keywords'])}):")

    print(result["matched_keywords"])

    print("\nMissing Keywords:")

    print(result["missing_keywords"])


    # Export to CSV (optional)

    df = pd.DataFrame({

        "Matched Keywords": pd.Series(result["matched_keywords"]),

        "Missing Keywords": pd.Series(result["missing_keywords"])

    })

    df.to_csv("ats_report.csv", index=False)

    print("\n Report saved as ats_report.csv")


# ---------------------------------------

# RUN

# ---------------------------------------

if __name__ == "__main__":

    resume_path = input("Enter Resume File Path (.pdf/.txt): ")

    jd_path = input("Enter Job Description File Path (.pdf/.txt): ")


    ats_tool(resume_path, jd_path)


No comments: