-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdatabase.py
More file actions
247 lines (206 loc) · 8.14 KB
/
database.py
File metadata and controls
247 lines (206 loc) · 8.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import logging
import os
import sqlite3
from datetime import datetime
# ── config ────────────────────────────────────────────────────────────────────
DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "rugby.db")
TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S"
logger = logging.getLogger(__name__)
# ── table definitions ─────────────────────────────────────────────────────────
CREATE_TEAMS_SQL = """
CREATE TABLE IF NOT EXISTS teams (
team_id INTEGER PRIMARY KEY AUTOINCREMENT,
team_name TEXT NOT NULL UNIQUE,
short_name TEXT,
country TEXT,
league TEXT
)
"""
CREATE_COMPETITIONS_SQL = """
CREATE TABLE IF NOT EXISTS competitions (
competition_id INTEGER PRIMARY KEY AUTOINCREMENT,
competition_name TEXT NOT NULL,
competition_type TEXT,
season TEXT
)
"""
CREATE_STANDINGS_SQL = """
CREATE TABLE IF NOT EXISTS standings (
standing_id INTEGER PRIMARY KEY AUTOINCREMENT,
team_id INTEGER NOT NULL,
competition_id INTEGER NOT NULL,
position INTEGER,
played INTEGER,
won INTEGER,
drawn INTEGER,
lost INTEGER,
points_for INTEGER,
points_against INTEGER,
points_diff TEXT,
points INTEGER,
scraped_date TEXT,
FOREIGN KEY (team_id) REFERENCES teams (team_id),
FOREIGN KEY (competition_id) REFERENCES competitions (competition_id),
UNIQUE (team_id, competition_id, scraped_date)
)
"""
CREATE_MATCHES_SQL = """
CREATE TABLE IF NOT EXISTS matches (
match_id INTEGER PRIMARY KEY AUTOINCREMENT,
competition_id INTEGER NOT NULL,
home_team TEXT NOT NULL,
away_team TEXT NOT NULL,
home_score INTEGER,
away_score INTEGER,
match_date TEXT,
FOREIGN KEY (competition_id) REFERENCES competitions (competition_id),
UNIQUE (competition_id, home_team, away_team, match_date)
)
"""
CREATE_SCRAPE_LOG_SQL = """
CREATE TABLE IF NOT EXISTS scrape_log (
log_id INTEGER PRIMARY KEY AUTOINCREMENT,
scraped_at TEXT NOT NULL,
records_found INTEGER,
status TEXT
)
"""
ALL_TABLES = [
CREATE_TEAMS_SQL,
CREATE_COMPETITIONS_SQL,
CREATE_STANDINGS_SQL,
CREATE_MATCHES_SQL,
CREATE_SCRAPE_LOG_SQL,
]
# ── connection ────────────────────────────────────────────────────────────────
def create_connection() -> sqlite3.Connection:
# opens a connection with foreign key enforcement enabled
conn = sqlite3.connect(DB_PATH)
conn.execute("PRAGMA foreign_keys = ON")
return conn
def _execute(sql: str, params: tuple = ()) -> None:
# runs a single write query using a context manager so the connection always closes
with create_connection() as conn:
try:
conn.execute(sql, params)
conn.commit()
except sqlite3.Error as e:
logger.error("database error: %s", e)
# ── setup ─────────────────────────────────────────────────────────────────────
def initialise_database() -> None:
# creates all tables if they don't already exist
for sql in ALL_TABLES:
_execute(sql)
# ── inserts ───────────────────────────────────────────────────────────────────
def insert_team(team_name: str, short_name, country, league) -> None:
# inserts a team — silently skips if team already exists
if not team_name:
return
_execute(
"INSERT OR IGNORE INTO teams (team_name, short_name, country, league) VALUES (?, ?, ?, ?)",
(team_name, short_name, country, league),
)
def insert_competition(competition_name: str, competition_type: str, season) -> None:
# inserts a competition — silently skips if already exists
if not competition_name:
return
_execute(
"INSERT OR IGNORE INTO competitions (competition_name, competition_type, season) VALUES (?, ?, ?)",
(competition_name, competition_type, season),
)
def insert_standing(
team_id: int,
competition_id: int,
position: int,
played: int,
won: int,
drawn: int,
lost: int,
points_for: int,
points_against: int,
points_diff: str,
points: int,
scraped_date: str,
) -> None:
# inserts a standing row — silently skips if same team/competition/date already exists
_execute(
"""
INSERT OR IGNORE INTO standings
(team_id, competition_id, position, played, won, drawn, lost,
points_for, points_against, points_diff, points, scraped_date)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
team_id,
competition_id,
position,
played,
won,
drawn,
lost,
points_for,
points_against,
points_diff,
points,
scraped_date,
),
)
def insert_match(
competition_id: int,
home_team: str,
away_team: str,
home_score: int,
away_score: int,
match_date: str,
) -> None:
# inserts a match result — silently skips duplicates
_execute(
"""
INSERT OR IGNORE INTO matches
(competition_id, home_team, away_team, home_score, away_score, match_date)
VALUES (?, ?, ?, ?, ?, ?)
""",
(competition_id, home_team, away_team, home_score, away_score, match_date),
)
def log_scrape(records_found: int, status: str) -> None:
# logs a scrape attempt with timestamp, record count and success/failure status
_execute(
"INSERT INTO scrape_log (scraped_at, records_found, status) VALUES (?, ?, ?)",
(datetime.now().strftime(TIMESTAMP_FORMAT), records_found, status),
)
# ── queries ───────────────────────────────────────────────────────────────────
def get_team_id(team_name: str) -> int | None:
# returns the team_id for a given team name, or None if not found
with create_connection() as conn:
row = conn.execute(
"SELECT team_id FROM teams WHERE team_name = ?", (team_name,)
).fetchone()
return row[0] if row else None
def get_known_match_ids(competition_id: int) -> set:
# returns a set of (home, away, date) tuples for all known matches in a competition
with create_connection() as conn:
try:
rows = conn.execute(
"SELECT home_team, away_team, match_date FROM matches WHERE competition_id = ?",
(competition_id,),
).fetchall()
return {(r[0], r[1], r[2]) for r in rows}
except sqlite3.Error as e:
logger.error("error fetching known matches: %s", e)
return set()
def get_match_score(
competition_id: int, home: str, away: str, match_date: str
) -> tuple:
# returns (home_score, away_score) for a match, or (0, 0) if not found
with create_connection() as conn:
row = conn.execute(
"""
SELECT home_score, away_score FROM matches
WHERE competition_id = ? AND home_team = ? AND away_team = ? AND match_date = ?
""",
(competition_id, home, away, match_date),
).fetchone()
return row if row else (0, 0)
if __name__ == "__main__":
initialise_database()
print("database ready!")