-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlinkedin_main.py
More file actions
92 lines (76 loc) · 3.17 KB
/
linkedin_main.py
File metadata and controls
92 lines (76 loc) · 3.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import sqlite3
import requests
from bs4 import BeautifulSoup
def get_or_insert(cursor, table, column, value):
cursor.execute(f"SELECT id FROM {table} WHERE {column} = ?", (value,))
row_id = cursor.fetchone()
if row_id:
return row_id[0]
cursor.execute(f"INSERT INTO {table} ({column}) VALUES (?)", (value,))
return cursor.lastrowid
def linkedin_scraper(webpage, conn, cursor, jobs_to_fetch=25):
page_number = 0
rows_to_fetch = 25
max_pages = (jobs_to_fetch + rows_to_fetch - 1) // rows_to_fetch
while page_number < max_pages:
next_page = webpage + str(page_number * rows_to_fetch)
print(f"Scraping: {next_page}")
try:
response = requests.get(next_page)
soup = BeautifulSoup(response.content, 'html.parser')
jobs = soup.find_all('div', class_='base-card relative w-full hover:no-underline focus:no-underline base-card--link base-search-card base-search-card--link job-search-card')
for job in jobs:
title = job.find('h3', class_='base-search-card__title').text.strip()
company = job.find('h4', class_='base-search-card__subtitle').text.strip()
location = job.find('span', class_='job-search-card__location').text.strip()
link = job.find('a', class_='base-card__full-link')['href']
title_id = get_or_insert(cursor, 'titles', 'name', title)
company_id = get_or_insert(cursor, 'companies', 'name', company)
location_id = get_or_insert(cursor, 'locations', 'name', location)
cursor.execute('''
INSERT INTO jobs (title_id, company_id, location_id, apply_link)
VALUES (?, ?, ?, ?)
''', (title_id, company_id, location_id, link))
conn.commit()
page_number += 1
except Exception as e:
print(f"Error occurred: {e}")
def main():
conn = sqlite3.connect('linkedin-jobs.db')
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS titles (
id INTEGER PRIMARY KEY,
name TEXT UNIQUE
)
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS companies (
id INTEGER PRIMARY KEY,
name TEXT UNIQUE
)
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS locations (
id INTEGER PRIMARY KEY,
name TEXT UNIQUE
)
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS jobs (
job_id INTEGER PRIMARY KEY AUTOINCREMENT,
title_id INTEGER,
company_id INTEGER,
location_id INTEGER,
apply_link TEXT,
FOREIGN KEY (title_id) REFERENCES titles (id),
FOREIGN KEY (company_id) REFERENCES companies (id),
FOREIGN KEY (location_id) REFERENCES locations (id)
)
''')
conn.commit()
jobs_to_fetch = 25
linkedin_scraper('https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search?keywords=Technology&location=United%20States&geoId=103644278&trk=public_jobs_jobs-search-bar_search-submit&position=1&pageNum=0&start=', conn, cursor, jobs_to_fetch)
conn.close()
if __name__ == "__main__":
main()