import fitz # PyMuPDF for PDF parsing
import docx2txt
import spacy
import re
from collections import Counter
import tkinter as tk
from tkinter import filedialog, messagebox
# Load NLP Model (English)
nlp = spacy.load("en_core_web_sm")
# Job Description (Example)
job_description = """
We are looking for a Data Scientist with expertise in Python, Machine Learning, and Data Analysis.
Candidates must have experience with Pandas, NumPy, and Scikit-learn.
Strong communication and teamwork skills are required.
"""
# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
text = ""
doc = fitz.open(pdf_path)
for page in doc:
text += page.get_text()
return text
# Function to extract text from DOCX
def extract_text_from_docx(docx_path):
return docx2txt.process(docx_path)
# Function to clean and preprocess text
def clean_text(text):
text = re.sub(r"\s+", " ", text) # Remove extra spaces
text = text.lower() # Convert to lowercase
return text
# Function to extract keywords using NLP
def extract_keywords(text):
doc = nlp(text)
keywords = [token.text for token in doc if token.is_alpha and not token.is_stop]
return Counter(keywords)
# Function to score the resume
def score_resume(resume_text, job_description):
resume_keywords = extract_keywords(resume_text)
job_keywords = extract_keywords(job_description)
# Calculate Keyword Match Score
matched_keywords = sum((resume_keywords & job_keywords).values())
total_keywords = sum(job_keywords.values())
keyword_score = (matched_keywords / total_keywords) * 100 if total_keywords else 0
# Readability Score (Basic: Word Count / Sentence Count)
sentence_count = len(re.findall(r"[.!?]", resume_text))
word_count = len(resume_text.split())
readability_score = (word_count / (sentence_count + 1)) * 2 # Simplified readability measure
# Final Score Calculation (Weighted Average)
final_score = (keyword_score * 0.7) + (readability_score * 0.3)
return round(final_score, 2), keyword_score, readability_score
# GUI for File Upload
def upload_file():
file_path = filedialog.askopenfilename(filetypes=[("PDF Files", "*.pdf"), ("Word Files", "*.docx")])
if file_path:
if file_path.endswith(".pdf"):
resume_text = extract_text_from_pdf(file_path)
elif file_path.endswith(".docx"):
resume_text = extract_text_from_docx(file_path)
else:
messagebox.showerror("Error", "Unsupported file format!")
return
# Clean and score resume
cleaned_resume = clean_text(resume_text)
final_score, keyword_score, readability_score = score_resume(cleaned_resume, job_description)
# Show results
messagebox.showinfo("Resume Score", f"📄 Resume Score: {final_score}%\n\n"
f"🔑 Keyword Match: {keyword_score:.2f}%\n"
f"📖 Readability Score: {readability_score:.2f}%")
# GUI Setup
root = tk.Tk()
root.title("AI Resume Scorer")
root.geometry("300x200")
upload_btn = tk.Button(root, text="Upload Resume", command=upload_file, padx=10, pady=5)
upload_btn.pack(pady=20)
root.mainloop()
No comments:
Post a Comment