-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpredict.py
More file actions
126 lines (112 loc) · 4.13 KB
/
predict.py
File metadata and controls
126 lines (112 loc) · 4.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# predict.py — replika preprocessing Streamlit (1:1)
import numpy as np
import pandas as pd
import joblib
# ---- load artifacts ----
SCALER = joblib.load("scaler.pkl")
MODEL = joblib.load("model.pkl")
# ---- label set yg dipakai di Streamlit ----
STREAMLIT_EDU = ["Bachelor (S1)", "Bachelor (Univ Top Dunia ex:MIT,Oxford)", "Master", "PhD"]
STREAMLIT_RS = ["Agresif (HR Ganas)", "Moderat", "Pasif (Stecu)"]
STREAMLIT_EXP = ["Junior", "Mid", "Senior"]
# ---- mapper fleksibel: terima enum dari DB-mu JUGA label Streamlit ----
def norm_edu(x: str) -> str:
x = (x or "").strip()
# enum DB -> label streamlit setara
if x.upper() == "S1": return STREAMLIT_EDU[0]
if x.upper() == "S2": return STREAMLIT_EDU[2]
if x.upper() == "S3": return STREAMLIT_EDU[3]
if x.upper() == "SMA": # tidak ada di training → treat as S1 (paling mendekati/ netral)
return STREAMLIT_EDU[0]
# kalau sudah pakai label streamlit, kembalikan apa adanya
if x in STREAMLIT_EDU: return x
# fallback netral
return STREAMLIT_EDU[0]
def norm_rs(x: str) -> str:
x = (x or "").strip()
if x.lower().startswith("agresif"): return STREAMLIT_RS[0]
if x.lower().startswith("moderat"): return STREAMLIT_RS[1]
if x.lower().startswith("pasif"): return STREAMLIT_RS[2]
if x in STREAMLIT_RS: return x
return STREAMLIT_RS[1]
def norm_exp(x: str) -> str:
x = (x or "").strip().capitalize()
if x not in STREAMLIT_EXP: return "Junior"
return x
# mapping integer seperti di Streamlit
edu_to_int = {
STREAMLIT_EDU[0]: 1, # Bachelor (S1)
STREAMLIT_EDU[1]: 2, # Bachelor Top Dunia
STREAMLIT_EDU[2]: 3, # Master
STREAMLIT_EDU[3]: 4, # PhD
}
rs_to_int = {
STREAMLIT_RS[0]: 1, # Agresif (HR Ganas)
STREAMLIT_RS[1]: 2, # Modorat
STREAMLIT_RS[2]: 3, # Pasif (Stecu)
}
FINAL_COLS = [
"InterviewScore", "SkillScore", "PersonalityScore",
"EducationLevel_2", "EducationLevel_3", "EducationLevel_4",
"RecruitmentStrategy_2", "RecruitmentStrategy_3",
"ExperienceLevel_Mid", "ExperienceLevel_Senior",
]
def build_features(
interview_score: float,
skill_score: float,
personality_score: float,
education_level: str,
recruitment_strategy: str,
experience_level: str,
) -> pd.DataFrame:
# 1) scaling 3 skor (persis Streamlit)
X_num = np.array([[interview_score, skill_score, personality_score]], dtype=float)
scaled = SCALER.transform(X_num) # -> [s_interview, s_skill, s_personality]
s_interview, s_skill, s_personality = scaled[0, 0], scaled[0, 1], scaled[0, 2]
# 2) OHE manual
ohe_cols = {
"EducationLevel_2": 0,
"EducationLevel_3": 0,
"EducationLevel_4": 0,
"RecruitmentStrategy_2": 0,
"RecruitmentStrategy_3": 0,
"ExperienceLevel_Mid": 0,
"ExperienceLevel_Senior": 0,
}
edu_label = norm_edu(education_level)
edu_code = edu_to_int[edu_label]
if edu_code >= 2:
ohe_cols[f"EducationLevel_{edu_code}"] = 1
rs_label = norm_rs(recruitment_strategy)
rs_code = rs_to_int[rs_label]
if rs_code >= 2:
ohe_cols[f"RecruitmentStrategy_{rs_code}"] = 1
exp_label = norm_exp(experience_level)
if exp_label == "Mid":
ohe_cols["ExperienceLevel_Mid"] = 1
elif exp_label == "Senior":
ohe_cols["ExperienceLevel_Senior"] = 1
row = {
"InterviewScore": s_interview,
"SkillScore": s_skill,
"PersonalityScore": s_personality,
**ohe_cols
}
df = pd.DataFrame([row], columns=FINAL_COLS)
return df
def predict_score_and_proba(payload: dict) -> tuple[int, float | None]:
df_features = build_features(
payload["interview_score"],
payload["skill_score"],
payload["personality_score"],
payload["education_level"],
payload["recruitment_strategy"],
payload["experience_level"],
)
y_pred = MODEL.predict(df_features)[0]
proba = None
if hasattr(MODEL, "predict_proba"):
proba = float(MODEL.predict_proba(df_features)[0][1])
# convert proba ke 0..100 (jika ada)
ai_score = int(round((proba * 100))) if proba is not None else int(y_pred) * 100
return ai_score, proba