import fitz # PyMuPDF
import spacy
import re
import pandas as pd
nlp = spacy.load("en_core_web_sm")
# ---------------------------------------
# Utility: Extract text from PDF or TXT
# ---------------------------------------
def extract_text(file_path):
if file_path.lower().endswith(".pdf"):
text = ""
pdf = fitz.open(file_path)
for page in pdf:
text += page.get_text()
return text
else:
# for .txt files
with open(file_path, "r", encoding="utf-8") as f:
return f.read()
# ---------------------------------------
# Clean & Normalize Text
# ---------------------------------------
def clean_text(text):
text = text.lower()
text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
text = re.sub(r'\s+', ' ', text)
return text
# ---------------------------------------
# Extract Keywords Using spaCy
# ---------------------------------------
def extract_keywords(text):
doc = nlp(text)
keywords = []
for token in doc:
# Keep nouns, verbs, adjectives (important for ATS)
if token.pos_ in ["NOUN", "PROPN", "VERB", "ADJ"]:
if len(token.text) > 2:
keywords.append(token.lemma_.lower())
return list(set(keywords))
# ---------------------------------------
# ATS Scoring Logic
# ---------------------------------------
def calculate_ats_score(resume_text, jd_text):
resume_clean = clean_text(resume_text)
jd_clean = clean_text(jd_text)
resume_keywords = extract_keywords(resume_clean)
jd_keywords = extract_keywords(jd_clean)
matched = [kw for kw in jd_keywords if kw in resume_keywords]
missing = [kw for kw in jd_keywords if kw not in resume_keywords]
score = (len(matched) / len(jd_keywords)) * 100 if jd_keywords else 0
return {
"ats_score": round(score, 2),
"matched_keywords": matched,
"missing_keywords": missing,
"total_keywords": len(jd_keywords)
}
# ---------------------------------------
# MAIN FUNCTION
# ---------------------------------------
def ats_tool(resume_file, jobdesc_file):
resume_text = extract_text(resume_file)
jd_text = extract_text(jobdesc_file)
result = calculate_ats_score(resume_text, jd_text)
print("\n ATS SCORING RESULTS")
print("--------------------------------")
print(f"ATS Score: {result['ats_score']}%")
print(f"Total Keywords in Job Description: {result['total_keywords']}")
print(f"Matched Keywords ({len(result['matched_keywords'])}):")
print(result["matched_keywords"])
print("\nMissing Keywords:")
print(result["missing_keywords"])
# Export to CSV (optional)
df = pd.DataFrame({
"Matched Keywords": pd.Series(result["matched_keywords"]),
"Missing Keywords": pd.Series(result["missing_keywords"])
})
df.to_csv("ats_report.csv", index=False)
print("\n Report saved as ats_report.csv")
# ---------------------------------------
# RUN
# ---------------------------------------
if __name__ == "__main__":
resume_path = input("Enter Resume File Path (.pdf/.txt): ")
jd_path = input("Enter Job Description File Path (.pdf/.txt): ")
ats_tool(resume_path, jd_path)
No comments:
Post a Comment