Sasisundar2211 · Copilot · Feb 28, 2026 · Feb 28, 2026
diff --git a/generate_model.py b/generate_model.py
@@ -3,6 +3,7 @@
 from src.data_preprocessing import preprocess_data
 from src.feature_engineering import engineer_features
 from src.clustering import cluster_users
+from src.profiling import profile_segments
 import os
 
 # Load dataset
@@ -20,8 +21,10 @@
 # Perform clustering and save model
 labels = cluster_users(X, k=4)
 
+# Assign segments and build profiles
+profile_segments(df, labels)
+
 # Save segmented output
-df['segment'] = labels
 os.makedirs("outputs", exist_ok=True)
 df.to_csv("outputs/segmented_users.csv", index=False)
 

diff --git a/src/constants.py b/src/constants.py
@@ -0,0 +1,24 @@
+TIME_WEEKDAY_COL = 'Time Spent Online (hrs/weekday)'
+TIME_WEEKEND_COL = 'Time Spent Online (hrs/weekend)'
+CTR_COL = 'Click-Through Rates (CTR)'
+CONVERSION_COL = 'Conversion Rates'
+ENGAGEMENT_SCORE_COL = 'engagement_score'
+AD_RESPONSIVENESS_COL = 'ad_responsiveness'
+
+NUMERICAL_COLS = [
+    TIME_WEEKDAY_COL,
+    TIME_WEEKEND_COL,
+    CTR_COL,
+    CONVERSION_COL,
+    'Ad Interaction Time (sec)',
+    ENGAGEMENT_SCORE_COL,
+    AD_RESPONSIVENESS_COL,
+]
+
+CATEGORICAL_COLS = [
+    'Age',
+    'Gender',
+    'Income Level',
+    'Education Level',
+    'Device Usage',
+]
diff --git a/src/data_preprocessing.py b/src/data_preprocessing.py
@@ -1,29 +1,12 @@
 import pandas as pd
 from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.compose import ColumnTransformer
+from src.constants import NUMERICAL_COLS, CATEGORICAL_COLS
 
 def preprocess_data(df):
-    numerical_cols = [
-        'Time Spent Online (hrs/weekday)',
-        'Time Spent Online (hrs/weekend)',
-        'Click-Through Rates (CTR)',
-        'Conversion Rates',
-        'Ad Interaction Time (sec)',
-        'engagement_score',
-        'ad_responsiveness'
-    ]
-
-    categorical_cols = [
-        'Age',               # ← categorical (e.g., '25-34')
-        'Gender',
-        'Income Level',      # ← also likely a string like 'High'
-        'Education Level',
-        'Device Usage'
-    ]
-
     preprocessor = ColumnTransformer([
-        ('num', StandardScaler(), numerical_cols),
-        ('cat', OneHotEncoder(handle_unknown="ignore"), categorical_cols)
+        ('num', StandardScaler(), NUMERICAL_COLS),
+        ('cat', OneHotEncoder(handle_unknown="ignore"), CATEGORICAL_COLS)
     ])
 
     X = preprocessor.fit_transform(df)

diff --git a/src/feature_engineering.py b/src/feature_engineering.py
@@ -1,6 +1,15 @@
+from src.constants import (
+    TIME_WEEKDAY_COL,
+    TIME_WEEKEND_COL,
+    CTR_COL,
+    CONVERSION_COL,
+    ENGAGEMENT_SCORE_COL,
+    AD_RESPONSIVENESS_COL,
+)
+
 def engineer_features(df):
-    df['engagement_score'] = (
-        df['Time Spent Online (hrs/weekday)'] + df['Time Spent Online (hrs/weekend)']
+    df[ENGAGEMENT_SCORE_COL] = (
+        df[TIME_WEEKDAY_COL] + df[TIME_WEEKEND_COL]
     ) / 2
-    df['ad_responsiveness'] = df['Click-Through Rates (CTR)'] * df['Conversion Rates']
+    df[AD_RESPONSIVENESS_COL] = df[CTR_COL] * df[CONVERSION_COL]
     return df