-
Notifications
You must be signed in to change notification settings - Fork 0
PR #574: Wire mlb_form_layer → live_batting_logs/live_pitching_logs (DB-first, API fallback) #444
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -41,6 +41,31 @@ | |||||||||||||
| logger = logging.getLogger("propiq.form") | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| # ── Postgres helpers (reads from live_batting_logs / live_pitching_logs) ────── | ||||||||||||||
| def _get_pg(): | ||||||||||||||
| """Return a psycopg2 connection or None.""" | ||||||||||||||
| import os | ||||||||||||||
| try: | ||||||||||||||
| import psycopg2 | ||||||||||||||
| db_url = os.environ.get("DATABASE_URL", "") | ||||||||||||||
| if not db_url: | ||||||||||||||
| return None | ||||||||||||||
| return psycopg2.connect(db_url) | ||||||||||||||
| except Exception: | ||||||||||||||
| return None | ||||||||||||||
|
|
||||||||||||||
| # Mapping: API stat key → (table_col_expr_for_batting, table_col_expr_for_pitching) | ||||||||||||||
| # None means not available in that table. | ||||||||||||||
| _DB_STAT_MAP = { | ||||||||||||||
| "hits": ("(h_1b + h_2b + h_3b + home_runs)", None), | ||||||||||||||
| "rbi": ("b_rbi", None), | ||||||||||||||
| "runs": ("b_runs", None), | ||||||||||||||
| "totalBases": ("(h_1b + 2*h_2b + 3*h_3b + 4*home_runs)", None), | ||||||||||||||
| "strikeOuts": ("b_k", "strikeouts"), | ||||||||||||||
| "earnedRuns": (None, "earnedruns"), | ||||||||||||||
| } | ||||||||||||||
|
Comment on lines
+59
to
+66
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| # ── Trend significance gates (from sequencebaseball/cogs/trends.py) ─────────── | ||||||||||||||
| try: | ||||||||||||||
| from fix_trend_significance import gate_form_adjustment, is_significant_trend | ||||||||||||||
|
|
@@ -165,6 +190,143 @@ | |||||||||||||
| # API fetchers | ||||||||||||||
| # ------------------------------------------------------------------ | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| # ------------------------------------------------------------------ | ||||||||||||||
| # DB-primary fetchers — read from nightly game_logs_refresh tables | ||||||||||||||
| # ------------------------------------------------------------------ | ||||||||||||||
|
|
||||||||||||||
| @staticmethod | ||||||||||||||
| def _fetch_game_log_from_db( | ||||||||||||||
| player_id: int, group: str, window: int | ||||||||||||||
| ) -> list[dict] | None: | ||||||||||||||
| """ | ||||||||||||||
| Query live_batting_logs / live_pitching_logs for recent game rows. | ||||||||||||||
| Returns list of synthetic "split" dicts (same shape _compute_form expects) | ||||||||||||||
| or None if the table is empty / player not found. | ||||||||||||||
| """ | ||||||||||||||
| conn = _get_pg() | ||||||||||||||
| if conn is None: | ||||||||||||||
| return None | ||||||||||||||
| table = "live_batting_logs" if group == "hitting" else "live_pitching_logs" | ||||||||||||||
| try: | ||||||||||||||
| with conn, conn.cursor() as cur: | ||||||||||||||
| cur.execute( | ||||||||||||||
|
Check failure on line 213 in mlb_form_layer.py
|
||||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||||||||||
| f""" | ||||||||||||||
| SELECT game_date, | ||||||||||||||
| h_1b, h_2b, h_3b, home_runs, | ||||||||||||||
| b_rbi, b_runs, b_k, | ||||||||||||||
| COALESCE(strikeouts, 0) AS strikeouts, | ||||||||||||||
| COALESCE(earnedruns, 0) AS earnedruns, | ||||||||||||||
| COALESCE(outs, 0) AS outs_pitched | ||||||||||||||
|
Comment on lines
+218
to
+220
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P1: The batting DB query references non-existent columns ( Prompt for AI agents
Suggested change
|
||||||||||||||
| FROM {table} | ||||||||||||||
| WHERE mlbam_id = %s | ||||||||||||||
| ORDER BY game_date DESC | ||||||||||||||
| LIMIT %s | ||||||||||||||
| """, | ||||||||||||||
| (player_id, window), | ||||||||||||||
| ) if table == "live_batting_logs" else cur.execute( | ||||||||||||||
| f""" | ||||||||||||||
| SELECT game_date, | ||||||||||||||
| 0 AS h_1b, 0 AS h_2b, 0 AS h_3b, 0 AS home_runs, | ||||||||||||||
| 0 AS b_rbi, 0 AS b_runs, 0 AS b_k, | ||||||||||||||
| COALESCE(strikeouts, 0) AS strikeouts, | ||||||||||||||
| COALESCE(earnedruns, 0) AS earnedruns, | ||||||||||||||
| COALESCE(outs, 0) AS outs_pitched | ||||||||||||||
| FROM {table} | ||||||||||||||
| WHERE mlbam_id = %s | ||||||||||||||
| ORDER BY game_date DESC | ||||||||||||||
| LIMIT %s | ||||||||||||||
| """, | ||||||||||||||
| (player_id, window), | ||||||||||||||
| ) | ||||||||||||||
|
Comment on lines
+213
to
+241
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||||||||||
| rows = cur.fetchall() | ||||||||||||||
| except Exception as exc: | ||||||||||||||
| logger.debug("[Form][DB] game log query failed: %s", exc) | ||||||||||||||
| return None | ||||||||||||||
| finally: | ||||||||||||||
| conn.close() | ||||||||||||||
|
|
||||||||||||||
| if not rows: | ||||||||||||||
| return None | ||||||||||||||
|
|
||||||||||||||
| def _row_to_stat(r) -> dict: | ||||||||||||||
| hits = (r[1] or 0) + (r[2] or 0) + (r[3] or 0) + (r[4] or 0) | ||||||||||||||
| tb = (r[1] or 0) + 2*(r[2] or 0) + 3*(r[3] or 0) + 4*(r[4] or 0) | ||||||||||||||
| return { | ||||||||||||||
| "hits": hits, | ||||||||||||||
| "rbi": r[5] or 0, | ||||||||||||||
| "runs": r[6] or 0, | ||||||||||||||
| "totalBases": tb, | ||||||||||||||
| "strikeOuts": r[7] if group == "hitting" else r[8], | ||||||||||||||
| "earnedRuns": r[9] or 0, | ||||||||||||||
| } | ||||||||||||||
|
Comment on lines
+252
to
+262
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||||||||||
|
|
||||||||||||||
| # Return synthetic split dicts | ||||||||||||||
| return [{"stat": _row_to_stat(r)} for r in rows] | ||||||||||||||
|
|
||||||||||||||
| @staticmethod | ||||||||||||||
| def _fetch_season_per_game_from_db( | ||||||||||||||
| player_id: int, group: str | ||||||||||||||
| ) -> dict[str, float] | None: | ||||||||||||||
| """ | ||||||||||||||
| Compute season-average-per-game from DB for baseline comparison. | ||||||||||||||
| Returns {api_stat_key: per_game_avg} or None if insufficient data. | ||||||||||||||
| """ | ||||||||||||||
| conn = _get_pg() | ||||||||||||||
| if conn is None: | ||||||||||||||
| return None | ||||||||||||||
| table = "live_batting_logs" if group == "hitting" else "live_pitching_logs" | ||||||||||||||
| try: | ||||||||||||||
| with conn, conn.cursor() as cur: | ||||||||||||||
| if table == "live_batting_logs": | ||||||||||||||
| cur.execute( | ||||||||||||||
| """ | ||||||||||||||
| SELECT COUNT(*), | ||||||||||||||
| SUM(h_1b+h_2b+h_3b+home_runs), | ||||||||||||||
| SUM(b_rbi), SUM(b_runs), | ||||||||||||||
| SUM(h_1b + 2*h_2b + 3*h_3b + 4*home_runs), | ||||||||||||||
| SUM(b_k) | ||||||||||||||
| FROM live_batting_logs | ||||||||||||||
| WHERE mlbam_id = %s AND game_date >= '2026-03-01' | ||||||||||||||
| """, | ||||||||||||||
| (player_id,), | ||||||||||||||
| ) | ||||||||||||||
| else: | ||||||||||||||
| cur.execute( | ||||||||||||||
| """ | ||||||||||||||
| SELECT COUNT(*), | ||||||||||||||
| 0, 0, 0, 0, | ||||||||||||||
| SUM(strikeouts), SUM(earnedruns) | ||||||||||||||
| FROM live_pitching_logs | ||||||||||||||
| WHERE mlbam_id = %s AND game_date >= '2026-03-01' | ||||||||||||||
|
Comment on lines
+290
to
+301
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a logic discrepancy between the DB fetcher and the API fetcher for the season baseline. The API fetcher (
Comment on lines
+290
to
+301
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hardcoded season date The season filter is hardcoded to 🐛 Proposed fix: derive season start dynamicallyAt the start of the method, compute the season start date: from datetime import datetime
# Inside _fetch_season_per_game_from_db, before the query:
current_year = datetime.now().year
season_start = f"{current_year}-03-01"Then use cur.execute(
"""
SELECT COUNT(*),
SUM(h_1b+h_2b+h_3b+home_runs),
SUM(b_rbi), SUM(b_runs),
SUM(h_1b + 2*h_2b + 3*h_3b + 4*home_runs),
SUM(b_k)
FROM live_batting_logs
- WHERE mlbam_id = %s AND game_date >= '2026-03-01'
+ WHERE mlbam_id = %s AND game_date >= %s
""",
- (player_id,),
+ (player_id, season_start),
)Apply the same change to the pitching query. 🤖 Prompt for AI Agents |
||||||||||||||
| """, | ||||||||||||||
| (player_id,), | ||||||||||||||
| ) | ||||||||||||||
| row = cur.fetchone() | ||||||||||||||
| except Exception as exc: | ||||||||||||||
| logger.debug("[Form][DB] season avg query failed: %s", exc) | ||||||||||||||
| return None | ||||||||||||||
| finally: | ||||||||||||||
| conn.close() | ||||||||||||||
|
|
||||||||||||||
| if not row or not row[0] or row[0] < 3: | ||||||||||||||
| return None | ||||||||||||||
|
|
||||||||||||||
| gp = row[0] | ||||||||||||||
| if group == "hitting": | ||||||||||||||
| return { | ||||||||||||||
| "hits": (row[1] or 0) / gp, | ||||||||||||||
| "rbi": (row[2] or 0) / gp, | ||||||||||||||
| "runs": (row[3] or 0) / gp, | ||||||||||||||
| "totalBases": (row[4] or 0) / gp, | ||||||||||||||
| "strikeOuts": (row[5] or 0) / gp, | ||||||||||||||
| } | ||||||||||||||
| else: | ||||||||||||||
| return { | ||||||||||||||
| "strikeOuts": (row[5] or 0) / gp, | ||||||||||||||
| "earnedRuns": (row[6] or 0) / gp, | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| @staticmethod | ||||||||||||||
| def _fetch_game_log( | ||||||||||||||
| player_id: int, group: str, window: int | ||||||||||||||
|
|
@@ -257,8 +419,17 @@ | |||||||||||||
|
|
||||||||||||||
| for group in groups_needed: | ||||||||||||||
| window = _PITCHER_WINDOW if group == "pitching" else _HITTER_WINDOW | ||||||||||||||
| recent_splits = self._fetch_game_log(player_id, group, window) | ||||||||||||||
| season_pg = self._fetch_season_per_game(player_id, group) | ||||||||||||||
|
|
||||||||||||||
| # DB-first: read from nightly game_logs_refresh tables (fast, no API quota) | ||||||||||||||
| recent_splits = self._fetch_game_log_from_db(player_id, group, window) | ||||||||||||||
| season_pg = self._fetch_season_per_game_from_db(player_id, group) | ||||||||||||||
|
|
||||||||||||||
| # API fallback if DB tables are empty / player not yet seeded | ||||||||||||||
| if recent_splits is None: | ||||||||||||||
| logger.debug("[Form] DB miss for pid=%d group=%s — falling back to live API", player_id, group) | ||||||||||||||
| recent_splits = self._fetch_game_log(player_id, group, window) | ||||||||||||||
| if season_pg is None: | ||||||||||||||
| season_pg = self._fetch_season_per_game(player_id, group) | ||||||||||||||
|
|
||||||||||||||
| if not recent_splits or not season_pg: | ||||||||||||||
| continue | ||||||||||||||
|
|
||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
_get_pghelper creates a new database connection every time it is called. Sinceprefetch_form_dataiterates over a set of players and calls multiple DB-fetching methods per player, this implementation will open and close hundreds of connections in a single dispatch run. This is highly inefficient and risks exhausting the database connection pool or hitting server-side connection limits. Consider using a connection pool (e.g.,psycopg2.pool.SimpleConnectionPool) or opening a single connection at the start of the pre-fetch loop and passing it down.