diff --git a/.github/workflows/backend-lint.yml b/.github/workflows/backend-lint.yml new file mode 100644 index 0000000..8498d3f --- /dev/null +++ b/.github/workflows/backend-lint.yml @@ -0,0 +1,44 @@ +name: Backend Python Lint + +# 1: Trigger only when python files are changed +on: + push: + branches: ["main"] + paths: + - "backend/**/*.py" + - ".github/workflows/backend-lint.yml" + pull_request: + branches: ["main"] + paths: + - "backend/**/*.py" + - ".github/workflows/backend-lint.yml" + +jobs: + lint-backend: + runs-on: ubuntu-latest + + steps: + - name: Checkout the repository code + uses: actions/checkout@v4 + +# 2: Pin exact python version requested + - name: Setup python 3.10.12 + uses: actions/setup-python@v5 + with: + python-version: "3.10.12" + + - name: Install Linting tools + run: | + python -m pip install --upgrade pip + pip install black flake8 + +# 3: Check code formatting + - name: Formatting with Black + run: | + # --check means "dont fix it, just fail the CI if it's formatted wrong" + black . --check + +# 4: Check for syntax errors and undefined names + - name: Linting with Flake8 + run: | + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics \ No newline at end of file diff --git a/backend/README.md b/backend/README.md new file mode 100644 index 0000000..09a50a7 --- /dev/null +++ b/backend/README.md @@ -0,0 +1,42 @@ +# Backend Documentation + +## Overview +FastAPI-based backend for The Method application. Handles resume analysis, interview preparation, job applications, and AI-powered recommendations using LLM integration. + +## Setup + +### Prerequisites +- Python 3.12.3+ +- PostgreSQL w/ SqlAlchemy +- GROQ API key +- Google Developer key(future) + +### Installation +```bash +python3 -m venv .venv +source .venv/bin/activate +python -m pip install --upgrade pip +pip install -r requirements.txt +pip install -r requirements-dev.txt +``` + +### Formatting Check +```bash +black . --check +``` + +### Auto Formatting Python Code +```bash +black . +``` + +#### Check for errors +```bash +flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics +``` + +**Selected error codes:** +- `E9` = Syntax errors +- `F63` = Invalid use of `*` in function definition +- `F7` = Undefined name in function definition +- `F82` = Undefined name in code \ No newline at end of file diff --git a/backend/database/__init__.py b/backend/database/__init__.py index 3710d15..9df57ec 100644 --- a/backend/database/__init__.py +++ b/backend/database/__init__.py @@ -1,2 +1,12 @@ from .engine import get_db, init_db -from .models import User, UserInfo, Education, Skill, Experience, Project, Certification, Award, Resume \ No newline at end of file +from .models import ( + User, + UserInfo, + Education, + Skill, + Experience, + Project, + Certification, + Award, + Resume, +) diff --git a/backend/database/engine.py b/backend/database/engine.py index 9fde8f4..3a303ac 100644 --- a/backend/database/engine.py +++ b/backend/database/engine.py @@ -4,26 +4,30 @@ from .models import Base -PSYCOPG_DATABASE_URL = os.getenv('DATABASE_URL').replace('postgresql://', 'postgresql+psycopg2://') +PSYCOPG_DATABASE_URL = os.getenv("DATABASE_URL").replace( + "postgresql://", "postgresql+psycopg2://" +) engine = create_engine(PSYCOPG_DATABASE_URL) SessionLocal = sessionmaker(bind=engine, autoflush=False) + def get_db(): - """ - Returns a database session - """ - db = SessionLocal() - try: - yield db - finally: - db.close() + """ + Returns a database session + """ + db = SessionLocal() + try: + yield db + finally: + db.close() + def init_db(): - """ - Initializes the database - """ - with engine.begin() as conn: - conn.execute(text('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";')) - Base.metadata.create_all(bind=engine) \ No newline at end of file + """ + Initializes the database + """ + with engine.begin() as conn: + conn.execute(text('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";')) + Base.metadata.create_all(bind=engine) diff --git a/backend/database/models.py b/backend/database/models.py index 78a77d2..c2ae862 100644 --- a/backend/database/models.py +++ b/backend/database/models.py @@ -4,7 +4,12 @@ from typing import List, Optional from sqlalchemy import Boolean, Numeric, Date, Text, ForeignKey, func, text -from sqlalchemy.dialects.postgresql import UUID as PG_UUID, JSONB as PG_JSONB, ARRAY as PG_ARRAY, ENUM as PG_ENUM +from sqlalchemy.dialects.postgresql import ( + UUID as PG_UUID, + JSONB as PG_JSONB, + ARRAY as PG_ARRAY, + ENUM as PG_ENUM, +) from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship @@ -13,52 +18,76 @@ class Base(DeclarativeBase): class JobType(Enum): - FULL_TIME = 'Full-time' - PART_TIME = 'Part-time' - CONTRACT = 'Contract' - INTERNSHIP = 'Internship' - FREELANCE = 'Freelance' - VOLUNTEER = 'Volunteer' + FULL_TIME = "Full-time" + PART_TIME = "Part-time" + CONTRACT = "Contract" + INTERNSHIP = "Internship" + FREELANCE = "Freelance" + VOLUNTEER = "Volunteer" class User(Base): - __tablename__ = 'users' + __tablename__ = "users" - id: Mapped[UUID] = mapped_column(PG_UUID(as_uuid=True), primary_key=True, server_default=text('uuid_generate_v4()')) + id: Mapped[UUID] = mapped_column( + PG_UUID(as_uuid=True), + primary_key=True, + server_default=text("uuid_generate_v4()"), + ) email: Mapped[str] = mapped_column(Text, unique=True, nullable=False) password_hash: Mapped[str] = mapped_column(Text, nullable=False) created_at: Mapped[datetime] = mapped_column(server_default=func.now()) last_email_update: Mapped[datetime] = mapped_column(server_default=func.now()) last_password_update: Mapped[datetime] = mapped_column(server_default=func.now()) - user_info: Mapped['UserInfo'] = relationship(back_populates='user', uselist=False, cascade='all, delete-orphan') - resumes: Mapped[List['Resume']] = relationship(back_populates='user', cascade='all, delete-orphan') + user_info: Mapped["UserInfo"] = relationship( + back_populates="user", uselist=False, cascade="all, delete-orphan" + ) + resumes: Mapped[List["Resume"]] = relationship( + back_populates="user", cascade="all, delete-orphan" + ) class UserInfo(Base): - __tablename__ = 'user_infos' + __tablename__ = "user_infos" id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) - user_id: Mapped[UUID] = mapped_column(ForeignKey('users.id', ondelete='CASCADE'), unique=True, nullable=False) + user_id: Mapped[UUID] = mapped_column( + ForeignKey("users.id", ondelete="CASCADE"), unique=True, nullable=False + ) name: Mapped[Optional[str]] = mapped_column(Text) phone_number: Mapped[Optional[str]] = mapped_column(Text) - us_citizen: Mapped[bool] = mapped_column(Boolean, server_default=text('false')) - links: Mapped[list] = mapped_column(PG_JSONB, server_default=text('\'[]\'::jsonb')) - - user: Mapped['User'] = relationship(back_populates='user_info') - educations: Mapped[List['Education']] = relationship(back_populates='user_info', cascade='all, delete-orphan') - skills: Mapped[List['Skill']] = relationship(back_populates='user_info', cascade='all, delete-orphan') - experiences: Mapped[List['Experience']] = relationship(back_populates='user_info', cascade='all, delete-orphan') - projects: Mapped[List['Project']] = relationship(back_populates='user_info', cascade='all, delete-orphan') - certifications: Mapped[List['Certification']] = relationship(back_populates='user_info', cascade='all, delete-orphan') - awards: Mapped[List['Award']] = relationship(back_populates='user_info', cascade='all, delete-orphan') + us_citizen: Mapped[bool] = mapped_column(Boolean, server_default=text("false")) + links: Mapped[list] = mapped_column(PG_JSONB, server_default=text("'[]'::jsonb")) + + user: Mapped["User"] = relationship(back_populates="user_info") + educations: Mapped[List["Education"]] = relationship( + back_populates="user_info", cascade="all, delete-orphan" + ) + skills: Mapped[List["Skill"]] = relationship( + back_populates="user_info", cascade="all, delete-orphan" + ) + experiences: Mapped[List["Experience"]] = relationship( + back_populates="user_info", cascade="all, delete-orphan" + ) + projects: Mapped[List["Project"]] = relationship( + back_populates="user_info", cascade="all, delete-orphan" + ) + certifications: Mapped[List["Certification"]] = relationship( + back_populates="user_info", cascade="all, delete-orphan" + ) + awards: Mapped[List["Award"]] = relationship( + back_populates="user_info", cascade="all, delete-orphan" + ) class Education(Base): - __tablename__ = 'educations' + __tablename__ = "educations" id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) - user_info_id: Mapped[int] = mapped_column(ForeignKey('user_infos.id', ondelete='CASCADE'), nullable=False) + user_info_id: Mapped[int] = mapped_column( + ForeignKey("user_infos.id", ondelete="CASCADE"), nullable=False + ) school: Mapped[str] = mapped_column(Text, nullable=False) major: Mapped[str] = mapped_column(Text, nullable=False) gpa: Mapped[float] = mapped_column(Numeric(3, 2)) @@ -67,83 +96,103 @@ class Education(Base): activities: Mapped[Optional[list[str]]] = mapped_column(PG_ARRAY(Text)) coursework: Mapped[Optional[list[str]]] = mapped_column(PG_ARRAY(Text)) - user_info: Mapped['UserInfo'] = relationship(back_populates='educations') + user_info: Mapped["UserInfo"] = relationship(back_populates="educations") class Skill(Base): - __tablename__ = 'skills' + __tablename__ = "skills" id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) - user_info_id: Mapped[int] = mapped_column(ForeignKey('user_infos.id', ondelete='CASCADE'), nullable=False) + user_info_id: Mapped[int] = mapped_column( + ForeignKey("user_infos.id", ondelete="CASCADE"), nullable=False + ) category_name: Mapped[str] = mapped_column(Text, nullable=False) skill_list: Mapped[list[str]] = mapped_column(PG_ARRAY(Text), nullable=False) - user_info: Mapped['UserInfo'] = relationship(back_populates='skills') + user_info: Mapped["UserInfo"] = relationship(back_populates="skills") class Experience(Base): - __tablename__ = 'experiences' + __tablename__ = "experiences" id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) - user_info_id: Mapped[int] = mapped_column(ForeignKey('user_infos.id', ondelete='CASCADE'), nullable=False) + user_info_id: Mapped[int] = mapped_column( + ForeignKey("user_infos.id", ondelete="CASCADE"), nullable=False + ) title: Mapped[str] = mapped_column(Text, nullable=False) company: Mapped[str] = mapped_column(Text, nullable=False) city: Mapped[str] = mapped_column(Text, nullable=False) state: Mapped[str] = mapped_column(Text, nullable=False) - job_type: Mapped[JobType] = mapped_column(PG_ENUM(JobType, name='job_type_enum'), nullable=False) + job_type: Mapped[JobType] = mapped_column( + PG_ENUM(JobType, name="job_type_enum"), nullable=False + ) start_date: Mapped[date] = mapped_column(Date, nullable=False) end_date: Mapped[Optional[date]] = mapped_column(Date) description: Mapped[Optional[str]] = mapped_column(Text) - user_info: Mapped['UserInfo'] = relationship(back_populates='experiences') + user_info: Mapped["UserInfo"] = relationship(back_populates="experiences") class Project(Base): - __tablename__ = 'projects' + __tablename__ = "projects" id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) - user_info_id: Mapped[int] = mapped_column(ForeignKey('user_infos.id', ondelete='CASCADE'), nullable=False) + user_info_id: Mapped[int] = mapped_column( + ForeignKey("user_infos.id", ondelete="CASCADE"), nullable=False + ) name: Mapped[str] = mapped_column(Text, nullable=False) start_date: Mapped[date] = mapped_column(Date, nullable=False) end_date: Mapped[Optional[date]] = mapped_column(Date) description: Mapped[Optional[str]] = mapped_column(Text) - user_info: Mapped['UserInfo'] = relationship(back_populates='projects') + user_info: Mapped["UserInfo"] = relationship(back_populates="projects") class Certification(Base): - __tablename__ = 'certifications' + __tablename__ = "certifications" id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) - user_info_id: Mapped[int] = mapped_column(ForeignKey('user_infos.id', ondelete='CASCADE'), nullable=False) + user_info_id: Mapped[int] = mapped_column( + ForeignKey("user_infos.id", ondelete="CASCADE"), nullable=False + ) name: Mapped[str] = mapped_column(Text, nullable=False) company: Mapped[str] = mapped_column(Text, nullable=False) date_issued: Mapped[date] = mapped_column(Date, nullable=False) - user_info: Mapped['UserInfo'] = relationship(back_populates='certifications') + user_info: Mapped["UserInfo"] = relationship(back_populates="certifications") class Award(Base): - __tablename__ = 'awards' + __tablename__ = "awards" id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) - user_info_id: Mapped[int] = mapped_column(ForeignKey('user_infos.id', ondelete='CASCADE'), nullable=False) + user_info_id: Mapped[int] = mapped_column( + ForeignKey("user_infos.id", ondelete="CASCADE"), nullable=False + ) name: Mapped[str] = mapped_column(Text, nullable=False) company: Mapped[str] = mapped_column(Text, nullable=False) city: Mapped[Optional[str]] = mapped_column(Text) state: Mapped[Optional[str]] = mapped_column(Text) date_received: Mapped[date] = mapped_column(Date, nullable=False) - user_info: Mapped['UserInfo'] = relationship(back_populates='awards') + user_info: Mapped["UserInfo"] = relationship(back_populates="awards") class Resume(Base): - __tablename__ = 'resumes' - - id: Mapped[UUID] = mapped_column(PG_UUID(as_uuid=True), primary_key=True, server_default=text('uuid_generate_v4()')) - user_id: Mapped[UUID] = mapped_column(ForeignKey('users.id', ondelete='CASCADE'), unique=True, nullable=False) + __tablename__ = "resumes" + + id: Mapped[UUID] = mapped_column( + PG_UUID(as_uuid=True), + primary_key=True, + server_default=text("uuid_generate_v4()"), + ) + user_id: Mapped[UUID] = mapped_column( + ForeignKey("users.id", ondelete="CASCADE"), unique=True, nullable=False + ) resume_data: Mapped[str] = mapped_column(Text, nullable=False) created_at: Mapped[datetime] = mapped_column(server_default=func.now()) - last_updated: Mapped[datetime] = mapped_column(server_default=func.now(), onupdate=func.now()) + last_updated: Mapped[datetime] = mapped_column( + server_default=func.now(), onupdate=func.now() + ) - user: Mapped['User'] = relationship(back_populates='resume') \ No newline at end of file + user: Mapped["User"] = relationship(back_populates="resume") diff --git a/backend/interview/refresh.py b/backend/interview/refresh.py index e074947..589191f 100644 --- a/backend/interview/refresh.py +++ b/backend/interview/refresh.py @@ -6,38 +6,39 @@ load_dotenv() # Get Data URL -DATA_URL = os.getenv('DATA_URL') +DATA_URL = os.getenv("DATA_URL") BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.join(BASE_DIR, 'data') +DATA_DIR = os.path.join(BASE_DIR, "data") + def refresh_data(): # Check if repo exists - if not DATA_URL: + if not DATA_URL: print("DATA_URL not found...") return # Check if data exists by checking .git folder - git_folder = os.path.join(DATA_DIR, '.git') - + git_folder = os.path.join(DATA_DIR, ".git") - if os.path.exists(git_folder): + if os.path.exists(git_folder): print(f"Updating DataSet...") try: # Update existing dataset using git command (pull) - subprocess.run(['git', '-C', DATA_DIR, 'pull'], check=True) + subprocess.run(["git", "-C", DATA_DIR, "pull"], check=True) print("Data updated successfully") except subprocess.CalledProcessError as e: print(f"Unable to update data: {e}") - else: + else: print(f"Obtaining DataSet...") try: # Obtain dataset using git command (clone) into DATA_DIR - subprocess.run(['git', 'clone', DATA_URL, DATA_DIR], check=True) + subprocess.run(["git", "clone", DATA_URL, DATA_DIR], check=True) print("Data successfully obtained") except subprocess.CalledProcessError as e: print(f"Unable to obtain data: {e}") -if __name__ == '__main__': - refresh_data() \ No newline at end of file + +if __name__ == "__main__": + refresh_data() diff --git a/backend/interview/service.py b/backend/interview/service.py index 8180c47..f92b1de 100644 --- a/backend/interview/service.py +++ b/backend/interview/service.py @@ -1,11 +1,12 @@ from fastapi import FastAPI, HTTPException -import pandas as pd +import pandas as pd import os import glob -# Setup directory for company data +# Setup directory for company data BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.join(BASE_DIR, 'data','company-questions') +DATA_DIR = os.path.join(BASE_DIR, "data", "company-questions") + def get_questions(company: str): # Search for company directory in data @@ -13,13 +14,14 @@ def get_questions(company: str): if not os.path.exists(company_dir): raise HTTPException(404, "Company not found") - + # Obtain list of csv files in company directory using glob - csv_files = glob.glob(os.path.join(company_dir, '*.csv')) + csv_files = glob.glob(os.path.join(company_dir, "*.csv")) if not csv_files: - raise HTTPException(status_code=404, detail="No CSV files found in company directory") - + raise HTTPException( + status_code=404, detail="No CSV files found in company directory" + ) target_file = csv_files[0] @@ -28,7 +30,6 @@ def get_questions(company: str): df = pd.read_csv(target_file) # Convert dataframe to JSON and return - return df.to_dict(orient='records') + return df.to_dict(orient="records") except Exception as e: raise HTTPException(status_code=500, detail=str(e)) - \ No newline at end of file diff --git a/backend/llm/client.py b/backend/llm/client.py index e268cac..8d03a33 100644 --- a/backend/llm/client.py +++ b/backend/llm/client.py @@ -1,9 +1,8 @@ -from groq import Groq # type: ignore +from groq import Groq # type: ignore from dotenv import load_dotenv import json import time - load_dotenv() MODEL_FAST = "llama-3.1-8b-instant" @@ -21,7 +20,7 @@ def chat(messages, max_tokens=300, temperature=0.3): model=MODEL_FAST, messages=messages, max_tokens=max_tokens, - temperature=temperature + temperature=temperature, ) return completion.choices[0].message.content @@ -30,7 +29,9 @@ def chat(messages, max_tokens=300, temperature=0.3): return "Error occurred while processing the request." -def generate_json(messages, max_tokens=2000, temperature=0.2, retries=2, use_smart_model=False): +def generate_json( + messages, max_tokens=2000, temperature=0.2, retries=2, use_smart_model=False +): """ Send chat messages to the LLM and parse the JSON output. Retries on JSON parsing errors. @@ -53,9 +54,9 @@ def generate_json(messages, max_tokens=2000, temperature=0.2, retries=2, use_sma messages=messages, max_tokens=max_tokens, temperature=temperature, - response_format={"type": "json_object"} + response_format={"type": "json_object"}, ) - + raw_response = completion.choices[0].message.content return json.loads(raw_response) @@ -65,7 +66,7 @@ def generate_json(messages, max_tokens=2000, temperature=0.2, retries=2, use_sma time.sleep(1) continue return {"error": "Failed to parse JSON from LLM response."} - + except Exception as e: print(f"Error occurred (attempt {attempt + 1}): {e}") if attempt < retries: diff --git a/backend/llm/prompts.py b/backend/llm/prompts.py index 2687295..4b9b0d2 100644 --- a/backend/llm/prompts.py +++ b/backend/llm/prompts.py @@ -146,7 +146,6 @@ Do not include markdown, explanations, or code fences. Return only the JSON object.""" - COVER_LETTER_PROMPT = """You are an expert career writer who specializes in natural, human-sounding cover letters. Given a resume JSON, job description, company name (optional), position title (optional), and hiring manager name (optional), write a tailored cover letter in proper business letter format that feels genuinely written by the candidate — not by AI. diff --git a/backend/llm/service.py b/backend/llm/service.py index 9863423..94fb17c 100644 --- a/backend/llm/service.py +++ b/backend/llm/service.py @@ -1,8 +1,13 @@ -from llm.prompts import RESUME_SYSTEM_PROMPT, ATS_OPTIMIZATION_PROMPT, COVER_LETTER_PROMPT +from llm.prompts import ( + RESUME_SYSTEM_PROMPT, + ATS_OPTIMIZATION_PROMPT, + COVER_LETTER_PROMPT, +) from llm.client import generate_json import json from typing import Optional + def generate_resume(resume_dict: dict) -> dict: """ Enhance raw resume data using the llm. @@ -14,14 +19,14 @@ def generate_resume(resume_dict: dict) -> dict: """ messages = [ {"role": "system", "content": RESUME_SYSTEM_PROMPT}, - {"role": "user", "content": json.dumps(resume_dict)} + {"role": "user", "content": json.dumps(resume_dict)}, ] result = generate_json(messages, use_smart_model=False) if "error" in result: return {"error": result["error"], "original": resume_dict} - + return result @@ -32,7 +37,7 @@ def optimize_resume(resume_dict: dict, job_description: str) -> dict: Args: resume_dict (dict): The raw resume data with resume model fields job_description (str): The job description text to optimize against - + Returns: dict: The optimized resume data - same structure as input """ @@ -44,18 +49,24 @@ def optimize_resume(resume_dict: dict, job_description: str) -> dict: messages = [ {"role": "system", "content": ATS_OPTIMIZATION_PROMPT}, - {"role": "user", "content": user_content} + {"role": "user", "content": user_content}, ] result = generate_json(messages, use_smart_model=True) - + if "error" in result: return {"error": result["error"], "original": resume_dict} - + return result -def generate_cover_letter(resume_dict: dict, job_description: str, company_name: Optional[str], position_title: Optional[str], hiring_manager_name: Optional[str]) -> dict: +def generate_cover_letter( + resume_dict: dict, + job_description: str, + company_name: Optional[str], + position_title: Optional[str], + hiring_manager_name: Optional[str], +) -> dict: """ Generate a cover letter based on resume data and job description. @@ -80,12 +91,14 @@ def generate_cover_letter(resume_dict: dict, job_description: str, company_name: messages = [ {"role": "system", "content": COVER_LETTER_PROMPT}, - {"role": "user", "content": user_content} + {"role": "user", "content": user_content}, ] - result = generate_json(messages, max_tokens=1000, temperature=0.5, use_smart_model=True) + result = generate_json( + messages, max_tokens=1000, temperature=0.5, use_smart_model=True + ) if "error" in result: return {"error": result["error"]} - return result \ No newline at end of file + return result diff --git a/backend/llm/test_llm.py b/backend/llm/test_llm.py index 7ae762e..772a5f4 100644 --- a/backend/llm/test_llm.py +++ b/backend/llm/test_llm.py @@ -2,8 +2,11 @@ messages = [ {"role": "system", "content": "You are an expert resume writer."}, - {"role": "user", "content": "Write 1 strong bullet point for a backend engineer experience."} + { + "role": "user", + "content": "Write 1 strong bullet point for a backend engineer experience.", + }, ] result = chat(messages) -print("LLM output:", result) \ No newline at end of file +print("LLM output:", result) diff --git a/backend/main.py b/backend/main.py index 35b99d9..4c03307 100644 --- a/backend/main.py +++ b/backend/main.py @@ -6,7 +6,13 @@ from llm.client import chat from llm.prompts import RESUME_SYSTEM_PROMPT -from models import ResumeRequest, GenerateResumeRequest, OptimizeResumeRequest, AnalyzeResumeRequest, CoverLetterRequest +from models import ( + ResumeRequest, + GenerateResumeRequest, + OptimizeResumeRequest, + AnalyzeResumeRequest, + CoverLetterRequest, +) from llm.service import generate_resume, optimize_resume, generate_cover_letter from resume_analyzer import ResumeAnalyzer, ValidationError @@ -27,12 +33,13 @@ allow_headers=["*"], ) -@app.on_event('startup') + +@app.on_event("startup") def startup_db(): init_db() -@app.post('/chat') +@app.post("/chat") async def chat_endpoint(request: ResumeRequest): """ Endpoint to handle chat requests for resume assistance. @@ -55,8 +62,7 @@ async def chat_endpoint(request: ResumeRequest): return {"response": response} - -@app.post('/llm/generate-resume') +@app.post("/llm/generate-resume") async def generate_resume_endpoint(request: GenerateResumeRequest): """ HTTP route handler. Deals with FastAPI/HTTP specifics @@ -80,31 +86,29 @@ async def generate_resume_endpoint(request: GenerateResumeRequest): return {"resume": enhanced_resume} - -@app.post('/llm/optimize-resume') +@app.post("/llm/optimize-resume") async def optimize_resume_endpoint(request: OptimizeResumeRequest): - """ - Optimize resume for ATS according to job description. - - Args: - request (OptimizeResumeRequest): The request body containing the user's prompt. - Returns: - dict: The LLM's response or an error message. - """ + """ + Optimize resume for ATS according to job description. - resume_dict = request.resume.model_dump() - job_description = request.job_description + Args: + request (OptimizeResumeRequest): The request body containing the user's prompt. + Returns: + dict: The LLM's response or an error message. + """ - optimized_resume = optimize_resume(resume_dict, job_description) + resume_dict = request.resume.model_dump() + job_description = request.job_description - if "error" in optimized_resume: - raise HTTPException(status_code=500, detail=optimized_resume["error"]) + optimized_resume = optimize_resume(resume_dict, job_description) - return {"resume": optimized_resume} + if "error" in optimized_resume: + raise HTTPException(status_code=500, detail=optimized_resume["error"]) + return {"resume": optimized_resume} -@app.post('/analyze-resume') +@app.post("/analyze-resume") async def analyze_resume_endpoint(request: AnalyzeResumeRequest): """ Analyze resume against job description. @@ -118,10 +122,9 @@ async def analyze_resume_endpoint(request: AnalyzeResumeRequest): try: resume_dict = request.resume.model_dump() - result = resume_analyzer.analyze({ - "resume": resume_dict, - "job_description": request.job_description - }) + result = resume_analyzer.analyze( + {"resume": resume_dict, "job_description": request.job_description} + ) return result.to_dict() @@ -131,7 +134,7 @@ async def analyze_resume_endpoint(request: AnalyzeResumeRequest): raise HTTPException(status_code=500, detail=str(e)) -@app.post('/llm/generate-cover-letter') +@app.post("/llm/generate-cover-letter") async def generate_cover_letter_endpoint(request: CoverLetterRequest): """ Generate a cover letter based on resume and job description. @@ -148,19 +151,24 @@ async def generate_cover_letter_endpoint(request: CoverLetterRequest): position_title = request.position_title hiring_manager_name = request.hiring_manager_name - result = generate_cover_letter(resume_dict, job_description, company_name, position_title, hiring_manager_name) + result = generate_cover_letter( + resume_dict, job_description, company_name, position_title, hiring_manager_name + ) if "error" in result: raise HTTPException(status_code=500, detail=result["error"]) return result + @app.get("/interview-questions/{company}") def get_interview_questions_endpoint(company: str): data = get_questions(company) if data is None: - raise HTTPException(status_code=404, detail=f"Company '{company}' not found or has no data") + raise HTTPException( + status_code=404, detail=f"Company '{company}' not found or has no data" + ) return data diff --git a/backend/models.py b/backend/models.py index a2d5822..07b83d9 100644 --- a/backend/models.py +++ b/backend/models.py @@ -2,7 +2,6 @@ This module defines Pydantic models for resume-related data structures and requests. """ - from typing import Optional from pydantic import BaseModel @@ -10,6 +9,7 @@ class ResumeRequest(BaseModel): messages: list + class Education(BaseModel): school: str | None = None major: str | None = None @@ -18,6 +18,7 @@ class Education(BaseModel): start_year: str | None = None end_year: str | None = None + class Experience(BaseModel): company: str | None = None title: str | None = None @@ -26,6 +27,7 @@ class Experience(BaseModel): start_date: str | None = None end_date: str | None = None + class Project(BaseModel): name: str | None = None description: str | None = None @@ -38,16 +40,19 @@ class Link(BaseModel): type: str | None = None # linkedin, github, portfolio, other url: str | None = None + class Certification(BaseModel): name: str | None = None issuer: str | None = None date: str | None = None + class Award(BaseModel): name: str | None = None issuer: str | None = None date: str | None = None + class Resume(BaseModel): name: str # required phone: str | None = None @@ -63,9 +68,11 @@ class Resume(BaseModel): certifications: list[Certification] | None = None awards: list[Award] | None = None + class GenerateResumeRequest(BaseModel): resume: Resume + class OptimizeResumeRequest(BaseModel): resume: Resume job_description: str @@ -76,7 +83,6 @@ class AnalyzeResumeRequest(BaseModel): job_description: str - class AnalyzeResumeResponse(BaseModel): score: float confidence: str diff --git a/backend/requirements-dev.in b/backend/requirements-dev.in new file mode 100644 index 0000000..4193e97 --- /dev/null +++ b/backend/requirements-dev.in @@ -0,0 +1,2 @@ +black +flake8 \ No newline at end of file diff --git a/backend/requirements-dev.txt b/backend/requirements-dev.txt new file mode 100644 index 0000000..ac8db74 --- /dev/null +++ b/backend/requirements-dev.txt @@ -0,0 +1,28 @@ +# +# This file is autogenerated by pip-compile with Python 3.12 +# by the following command: +# +# pip-compile --output-file=requirements-dev.txt requirements-dev.in +# +black==26.3.1 + # via -r requirements-dev.in +click==8.3.1 + # via black +flake8==7.3.0 + # via -r requirements-dev.in +mccabe==0.7.0 + # via flake8 +mypy-extensions==1.1.0 + # via black +packaging==26.0 + # via black +pathspec==1.0.4 + # via black +platformdirs==4.9.4 + # via black +pycodestyle==2.14.0 + # via flake8 +pyflakes==3.4.0 + # via flake8 +pytokens==0.4.1 + # via black diff --git a/backend/resume_analyzer.py b/backend/resume_analyzer.py index 2210cf9..70bbc44 100644 --- a/backend/resume_analyzer.py +++ b/backend/resume_analyzer.py @@ -6,7 +6,7 @@ Usage: from resume_analyzer import ResumeAnalyzer, AnalysisResult - + analyzer = ResumeAnalyzer() result = analyzer.analyze({ "resume": {...}, @@ -30,6 +30,7 @@ class ConfidenceLevel(Enum): """Confidence assessment levels.""" + HIGH = "HIGH" MEDIUM = "MEDIUM" LOW = "LOW" @@ -37,6 +38,7 @@ class ConfidenceLevel(Enum): class RecommendationLevel(Enum): """Hiring recommendation levels.""" + STRONG = "STRONG" REVIEW = "REVIEW" INTERVIEW = "INTERVIEW" @@ -46,27 +48,32 @@ class RecommendationLevel(Enum): @dataclass class ScoringConfig: """Configurable weights for resume analysis.""" + # Weights calibrated against construction tech hiring decisions - tfidf_weight: float = 0.5 # Content relevance - skills_weight: float = 0.3 # Skill match - experience_weight: float = 0.2 # Experience level + tfidf_weight: float = 0.5 # Content relevance + skills_weight: float = 0.3 # Skill match + experience_weight: float = 0.2 # Experience level # Performance limits max_resume_chars: int = 50_000 max_jd_chars: int = 10_000 - + def validate(self) -> None: """Ensure weights sum to 1.0.""" weight_sum = self.tfidf_weight + self.skills_weight + self.experience_weight if not (0.99 <= weight_sum <= 1.01): raise ValueError(f"Weights must sum to 1.0, got {weight_sum}") - if any(w < 0 or w > 1 for w in [self.tfidf_weight, self.skills_weight, self.experience_weight]): + if any( + w < 0 or w > 1 + for w in [self.tfidf_weight, self.skills_weight, self.experience_weight] + ): raise ValueError("Weights must be between 0 and 1") @dataclass class AnalysisResult: """Structured analysis result.""" + score: float confidence: str recommendation: str @@ -79,7 +86,7 @@ class AnalysisResult: suggested_keywords: List[str] jd_skills_count: int resume_name: str - + def to_dict(self) -> Dict[str, Any]: """Convert to dictionary for JSON serialization.""" return asdict(self) @@ -87,6 +94,7 @@ def to_dict(self) -> Dict[str, Any]: class ValidationError(ValueError): """Custom exception for validation failures.""" + pass @@ -100,69 +108,78 @@ class ResumeAnalyzer: - Job skills: LLM extraction (smart, handles any job description) - Matching: Simple set intersection (deterministic) """ - + def __init__(self, config: Optional[ScoringConfig] = None): """Initialize analyzer with optional custom config.""" self.config = config or ScoringConfig() self.config.validate() logger.info("ResumeAnalyzer initialized with config: %s", asdict(self.config)) - + def analyze(self, data: Dict[str, Any]) -> AnalysisResult: """ Analyze resume against job description. - + Args: data: Dictionary with 'resume' and 'job_description' keys - + Returns: AnalysisResult with comprehensive matching analysis - + Raises: ValidationError: If input doesn't meet requirements """ logger.info("Starting resume analysis") - + # Validate input self._validate_input(data) - resume = data['resume'] - job_desc = data['job_description'] - + resume = data["resume"] + job_desc = data["job_description"] + # Extract and normalize data resume_skills = self._extract_resume_skills(resume) job_skills = self._extract_job_skills(job_desc) - experience_years = self._calculate_experience(resume.get('experience', [])) - - logger.info("Extracted %d resume skills and %d job skills", - len(resume_skills), len(job_skills)) - + experience_years = self._calculate_experience(resume.get("experience", [])) + + logger.info( + "Extracted %d resume skills and %d job skills", + len(resume_skills), + len(job_skills), + ) + # Calculate matches matched_skills = self._match_skills(resume_skills, job_skills) missing_skills = [s for s in job_skills if s not in matched_skills] - + # Score components - tfidf_score = self._calculate_tfidf_score(resume, job_desc, resume_skills, job_skills) + tfidf_score = self._calculate_tfidf_score( + resume, job_desc, resume_skills, job_skills + ) skills_overlap = (len(matched_skills) / max(len(job_skills), 1)) * 100 experience_score = min((experience_years / 8) * 100, 100) - + # Overall score (weighted average) overall_score = ( - tfidf_score * self.config.tfidf_weight + - skills_overlap * self.config.skills_weight + - experience_score * self.config.experience_weight + tfidf_score * self.config.tfidf_weight + + skills_overlap * self.config.skills_weight + + experience_score * self.config.experience_weight ) - + # Confidence level confidence = self._assess_confidence(job_skills) - + # Recommendation recommendation = self._get_recommendation(overall_score, skills_overlap) - + # Suggested keywords (missing skills + top keywords from JD) suggested_keywords = missing_skills[:6] - - logger.info("Analysis complete: score=%.1f, confidence=%s, recommendation=%s", - overall_score, confidence, recommendation) - + + logger.info( + "Analysis complete: score=%.1f, confidence=%s, recommendation=%s", + overall_score, + confidence, + recommendation, + ) + result = AnalysisResult( score=round(overall_score, 1), confidence=confidence, @@ -175,41 +192,43 @@ def analyze(self, data: Dict[str, Any]) -> AnalysisResult: missing_skills=missing_skills, suggested_keywords=suggested_keywords, jd_skills_count=len(job_skills), - resume_name=resume.get('name', 'Unknown') + resume_name=resume.get("name", "Unknown"), ) - + return result - + def _validate_input(self, data: Dict[str, Any]) -> None: """Validate input structure.""" if not isinstance(data, dict): raise ValidationError(f"Input must be dict, got {type(data).__name__}") - - if 'resume' not in data or 'job_description' not in data: + + if "resume" not in data or "job_description" not in data: raise ValidationError("Input must have 'resume' and 'job_description' keys") - - resume = data['resume'] + + resume = data["resume"] if not isinstance(resume, dict): raise ValidationError(f"Resume must be dict, got {type(resume).__name__}") - - if not resume.get('name'): + + if not resume.get("name"): raise ValidationError("Resume must have 'name' field") - - if not resume.get('skills'): + + if not resume.get("skills"): raise ValidationError("Resume must have 'skills' field") - - if not isinstance(resume['skills'], list): + + if not isinstance(resume["skills"], list): raise ValidationError("Resume 'skills' must be list") - - job_desc = data['job_description'] + + job_desc = data["job_description"] if not isinstance(job_desc, str): - raise ValidationError(f"Job description must be string, got {type(job_desc).__name__}") - + raise ValidationError( + f"Job description must be string, got {type(job_desc).__name__}" + ) + if len(job_desc.strip()) < 10: raise ValidationError("Job description too short (minimum 10 characters)") - - logger.debug("Input validation passed for resume '%s'", resume.get('name')) - + + logger.debug("Input validation passed for resume '%s'", resume.get("name")) + def _extract_resume_skills(self, resume: Dict[str, Any]) -> Set[str]: """ Extract skills from resume using explicit skills list. @@ -218,7 +237,7 @@ def _extract_resume_skills(self, resume: Dict[str, Any]) -> Set[str]: skills = set() # Use explicit skills list (user-provided, deterministic) - for skill in resume.get('skills', []): + for skill in resume.get("skills", []): if isinstance(skill, str) and skill.strip(): # Normalize: lowercase and strip whitespace normalized = skill.lower().strip() @@ -226,7 +245,7 @@ def _extract_resume_skills(self, resume: Dict[str, Any]) -> Set[str]: logger.debug("Extracted %d resume skills from explicit list", len(skills)) return skills - + def _extract_job_skills(self, job_desc: str) -> Set[str]: """ Extract skills from job description using LLM. @@ -235,7 +254,7 @@ def _extract_job_skills(self, job_desc: str) -> Set[str]: try: messages = [ {"role": "system", "content": SKILL_EXTRACTION_PROMPT}, - {"role": "user", "content": job_desc} + {"role": "user", "content": job_desc}, ] result = generate_json(messages, max_tokens=1000, use_smart_model=False) @@ -245,7 +264,11 @@ def _extract_job_skills(self, job_desc: str) -> Set[str]: return set() skills = result.get("skills", []) - skill_set = {skill.lower().strip() for skill in skills if isinstance(skill, str) and skill.strip()} + skill_set = { + skill.lower().strip() + for skill in skills + if isinstance(skill, str) and skill.strip() + } logger.debug("Extracted %d job skills via LLM", len(skill_set)) return skill_set @@ -253,8 +276,7 @@ def _extract_job_skills(self, job_desc: str) -> Set[str]: except Exception as e: logger.error("Failed to extract job skills: %s", e) return set() - - + def _match_skills(self, resume_skills: Set[str], job_skills: Set[str]) -> Set[str]: """Match resume skills to job skills.""" matched = set() @@ -262,60 +284,73 @@ def _match_skills(self, resume_skills: Set[str], job_skills: Set[str]) -> Set[st if skill in job_skills: matched.add(skill) return matched - - def _calculate_tfidf_score(self, resume: Dict[str, Any], job_desc: str, - resume_skills: Set[str], job_skills: Set[str]) -> float: + + def _calculate_tfidf_score( + self, + resume: Dict[str, Any], + job_desc: str, + resume_skills: Set[str], + job_skills: Set[str], + ) -> float: """ Calculate TF-IDF-inspired score based on skill overlap and keyword density in job description relative to resume. """ if not job_skills: return 0.0 - + # Simple TF-IDF: proportion of skills that appear in both - overlap_ratio = len(resume_skills & job_skills) / len(job_skills | resume_skills) - + overlap_ratio = len(resume_skills & job_skills) / len( + job_skills | resume_skills + ) + # Bonus for keyword density in JD total_keywords = len(job_skills) - skill_density = total_keywords / (len(job_desc.split()) + 1) # +1 to avoid division by zero - + skill_density = total_keywords / ( + len(job_desc.split()) + 1 + ) # +1 to avoid division by zero + # Weighted score tfidf_score = (overlap_ratio * 70) + (skill_density * 30) return min(tfidf_score, 100) # Cap at 100 - + def _calculate_experience(self, experience: List[Dict[str, Any]]) -> int: """Calculate total experience in years.""" if not experience: return 0 - + total_years = 0 for exp in experience: if not isinstance(exp, dict): continue - - start_date = exp.get('start_date', '') - end_date = exp.get('end_date', '') - + + start_date = exp.get("start_date", "") + end_date = exp.get("end_date", "") + # Extract year from date string current_year = datetime.now().year start_year = self._extract_year(start_date) or current_year - end_year = current_year if 'present' in end_date.lower() else (self._extract_year(end_date) or current_year) - + end_year = ( + current_year + if "present" in end_date.lower() + else (self._extract_year(end_date) or current_year) + ) + years = max(0, end_year - start_year) total_years += years - + logger.debug("Calculated experience: %d years", total_years) return total_years - + def _extract_year(self, date_str: str) -> Optional[int]: """Extract year from date string.""" if not date_str: return None - + # Look for 4-digit year - match = re.search(r'\b(19|20)\d{2}\b', date_str) + match = re.search(r"\b(19|20)\d{2}\b", date_str) return int(match.group(0)) if match else None - + def _assess_confidence(self, job_skills: Set[str]) -> str: """Assess confidence level based on job description clarity.""" if len(job_skills) > 10: @@ -324,7 +359,7 @@ def _assess_confidence(self, job_skills: Set[str]) -> str: return ConfidenceLevel.MEDIUM.value else: return ConfidenceLevel.LOW.value - + def _get_recommendation(self, score: float, skills_overlap: float) -> str: """Get hiring recommendation based on scores.""" if score >= 75 and skills_overlap >= 70: @@ -338,14 +373,16 @@ def _get_recommendation(self, score: float, skills_overlap: float) -> str: # Module-level convenience function -def analyze_resume(data: Dict[str, Any], config: Optional[ScoringConfig] = None) -> AnalysisResult: +def analyze_resume( + data: Dict[str, Any], config: Optional[ScoringConfig] = None +) -> AnalysisResult: """ Convenience function to analyze a resume. - + Args: data: Dictionary with 'resume' and 'job_description' keys config: Optional ScoringConfig - + Returns: AnalysisResult """ @@ -356,7 +393,7 @@ def analyze_resume(data: Dict[str, Any], config: Optional[ScoringConfig] = None) if __name__ == "__main__": # Example usage logging.basicConfig(level=logging.INFO) - + example_data = { "resume": { "name": "Sarah Chen", @@ -367,13 +404,13 @@ def analyze_resume(data: Dict[str, Any], config: Optional[ScoringConfig] = None) "title": "Senior Engineer", "start_date": "June 2022", "end_date": "Present", - "description": "Led microservices with Python and AWS" + "description": "Led microservices with Python and AWS", } - ] + ], }, - "job_description": "Staff Backend Engineer: 5+ years Python, Go, PostgreSQL, Redis, Kubernetes, AWS/GCP. Event-driven architecture, Kafka, gRPC, Datadog." + "job_description": "Staff Backend Engineer: 5+ years Python, Go, PostgreSQL, Redis, Kubernetes, AWS/GCP. Event-driven architecture, Kafka, gRPC, Datadog.", } - + analyzer = ResumeAnalyzer() result = analyzer.analyze(example_data) print(json.dumps(result.to_dict(), indent=2))