Student_alumni/app.py at main · Samartho7/Student_alumni · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
from flask import Flask, request, jsonify
from flask_cors import CORS
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


app = Flask(__name__)
CORS(app, resources={r"/match": {"origins": "http://localhost:5173"}})
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

def convert_numpy_types(obj):
    if isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, (np.float32, np.float64)):
        # Convert floats to integers specifically for percentages
        return int(round(obj))
    elif isinstance(obj, (np.int32, np.int64)):
        return int(obj)
    elif isinstance(obj, list):
        return [convert_numpy_types(item) for item in obj]
    elif isinstance(obj, dict):
        return {key: convert_numpy_types(value) for key, value in obj.items()}
    else:
        return obj

# Load students and alumni data from JSON files
students = [
   {
    "name": "Aarav Gupta",
    "skills": ["python", "machine learning"],
    "interests": ["AI", "data science"],
    "location": "NY"
   },
   {
    "name": "Ishita Mehra",
    "skills": ["javascript", "web development"],
    "interests": ["frontend", "UX"],
    "location": "CA"
   },
   {
    "name": "Rohit Sharma",
    "skills": ["C++","Game design"],
    "interests": ["Game development","ML"],
    "location": "INDIA"
   },
   {
    "name": "Neha Patel",
    "skills": ["java","deep learning"],
    "interests": ["data science","AI"],
    "location": "NZ"
   },
   {
    "name": "Karan Singh",
    "skills": ["python","Machine learning"],
    "interests": ["AI","NLP"],
    "location": "EUROPE"
   },
   {
    "name": "Anjali Rao",
    "skills": ["Python", "Data Analysis", "Machine Learning"],
    "interests": ["Artificial Intelligence", "Data Science"],
    "location": "Mumbai, India"

    },
   {
    "name": "Rahul Verma",
    "skills": ["Java", "Spring Framework", "Microservices"],
    "interests": ["Web Development", "Cloud Computing"],
    "location": "Bangalore, India"
    },
   {
    "name": "Priya Desai",
    "skills": ["JavaScript", "React", "Node.js"],
    "interests": ["Frontend Development", "UI/UX Design"],
    "location": "Pune, India"
    },
   {
    "name": "Sahil Jain",
    "skills": ["C++", "Game Development", "Unity"],
    "interests": ["Gaming", "Game Design"],
    "location": "Delhi, India"
    },
   {
    "name": "Sneha Kapoor",
    "skills": ["HTML", "CSS", "Web Design"],
    "interests": ["Graphic Design", "Digital Marketing"],
    "location": "Chennai, India"
    }
  ]
alumni = [
    {
        "name": "John Doe",
        "skills": ["python", "data science"],
        "interests": ["AI", "deep learning"],
        "location": "NY",
        "mentorship": True
    },
    {
        "name": "Jane Smith",
        "skills": ["javascript", "React"],
        "interests": ["web development", "frontend"],
        "location": "CA",
        "mentorship": True
    },
    {
        "name": "Robert Brown",
        "skills": ["C++","Game design"],
        "interests": ["Game development","ML"],
        "location": "TX"

    },
    {
        "name": "Lisa Johnson",
        "skills": ["data science","deep learning"],
        "interests": ["AI","ML"],
        "location": "FL"

    },
    {
        "name": "Michael Green",
        "skills": ["java","deep learning"],
        "interests": ["AI","NLP"],
        "location": "IL"
    },
    {
        "name": "Emily White",
        "skills": ["Python", "Data Visualization", "Machine Learning"],
        "interests": ["Data Science", "Artificial Intelligence"],
        "location": "Seattle, WA",
        "mentorship": True
    },
    {
        "name": "William Black",
        "skills": ["Java", "Android Development", "Spring Boot"],
        "interests": ["Mobile App Development", "Cloud Computing"],
        "location": "Austin, TX"
    },
    {
        "name": "Jessica Blue",
        "skills": ["JavaScript", "React", "Node.js"],
        "interests": ["Frontend Development", "Web Applications"],
        "location": "San Francisco, CA"
    },
    {
        "name": "Chris Gray",
        "skills": ["C#", ".NET", "Web Development"],
        "interests": ["Software Engineering", "Cloud Solutions"],
        "location": "New York, NY",
        "mentorship": True
    },
    {
        "name": "Sophia Yellow",
        "skills": ["HTML", "CSS", "UI/UX Design"],
        "interests": ["Graphic Design", "User Experience"],
        "location": "Los Angeles, CA"
    },
    {
        "name": "Ethan Green",
        "skills": ["C++", "Game Development", "Unity"],
        "interests": ["Gaming", "Interactive Media"],
        "location": "Chicago, IL"
    },
    {
        "name": "Olivia Blue",
        "skills": ["PHP", "MySQL", "Web Development"],
        "interests": ["Full Stack Development", "Digital Marketing"],
        "location": "Miami, FL",
        "mentorship": True
    }
  ]

def get_embedding(text):
    return model.encode(text)

def vectorize_profiles(profiles):
    skill_embeddings = []
    interest_embeddings = []

    for profile in profiles:
        skills_text = " ".join(profile['skills'])
        interests_text = " ".join(profile['interests'])
        skill_embeddings.append(get_embedding(skills_text))
        interest_embeddings.append(get_embedding(interests_text))
    return np.array(skill_embeddings), np.array(interest_embeddings)

def calculate_weighted_similarity(student, alum, skill_vectors, interest_vectors, alumni, skill_weight=0.7, interest_weight=0.3):
    student_skill_vector = get_embedding(" ".join(student['skills']))
    student_interest_vector = get_embedding(" ".join(student['interests']))

    alum_index = alumni.index(alum)  # Access alumni correctly

    skill_similarity = cosine_similarity([student_skill_vector], [skill_vectors[alum_index]])[0][0]
    interest_similarity = cosine_similarity([student_interest_vector], [interest_vectors[alum_index]])[0][0]

    weighted_similarity = (skill_similarity * skill_weight) + (interest_similarity * interest_weight)

    # Location and mentorship boosts
    if student['location'] == alum['location']:
        weighted_similarity += 0.1
    if alum.get('mentorship', False):
        weighted_similarity += 0.1

    return round(min(weighted_similarity * 100, 100), 2)

def match_students_to_alumni(students, alumni, skill_vectors, interest_vectors):
    matches = {}

    for student in students:
        student_matches = []

        for alum in alumni:
            score = calculate_weighted_similarity(student, alum, skill_vectors, interest_vectors, alumni)  # Pass alumni list here
            student_matches.append({'alumni': alum['name'], 'score': score})

        sorted_matches = sorted(student_matches, key=lambda x: x['score'], reverse=True)
        matches[student['name']] = sorted_matches[:5]

    return matches

@app.route('/match', methods=['POST'])
def match():
     # Extract the list of alumni profiles
    data = request.json
    print("Data received:", data)

    if 'students' not in data:
        return jsonify({'error': 'No students data provided'}), 400

    skill_vectors, interest_vectors = vectorize_profiles(alumni)  # Pass correct alumni list to vectorize
    results = match_students_to_alumni(data['students'], alumni, skill_vectors, interest_vectors)
    results = convert_numpy_types(results)
    print("Matching results:", results)
    return jsonify(results)

if __name__ == '__main__':
    app.run(debug=True)