From 0319aef2069039adf404ed9759bf5805c9c54a35 Mon Sep 17 00:00:00 2001 From: Pj Metz Date: Thu, 14 May 2026 12:38:33 -0700 Subject: [PATCH 1/2] =?UTF-8?q?Remove=20keyword=20filter=20from=20YouTube?= =?UTF-8?q?=20digest=20=E2=80=94=20pull=20all=20recent=20videos?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The YouTube digest now fetches any video posted in the past 7 days from the configured channel, rather than filtering by topic keywords. This avoids empty digests caused by keyword mismatches. Changes: - Replace search_by_keywords/get_top_videos_by_keywords with search_recent/get_top_recent_videos in youtube_api.py (no q param) - Remove keywords config and _DEFAULT_KEYWORDS from youtube_watcher.py - Update embed description to drop the Topics line - Remove keywords from youtube section of config.yaml - Update all tests to use the new method names Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- bot/cogs/youtube_watcher.py | 36 ++++++++--------------------- config/config.yaml | 11 +-------- tests/test_cog_flows.py | 3 +-- tests/test_youtube_api.py | 19 +++++++-------- utils/youtube_api.py | 46 +++++++++++-------------------------- 5 files changed, 33 insertions(+), 82 deletions(-) diff --git a/bot/cogs/youtube_watcher.py b/bot/cogs/youtube_watcher.py index 5e092ac..d7e27bc 100644 --- a/bot/cogs/youtube_watcher.py +++ b/bot/cogs/youtube_watcher.py @@ -7,9 +7,8 @@ 2. On each run the cog checks whether today (UTC) is the configured ``digest_day`` (default: Thursday). 3. If it is Thursday and a digest hasn't already been sent today, the cog: - a. Searches the GitHub YouTube channel for videos matching any of the - configured ``keywords`` published in the past 7 days - (up to ``search_pool`` candidates). + a. Fetches all videos from the GitHub YouTube channel published in the + past 7 days (up to ``search_pool`` candidates). b. Fetches view counts for all candidates in a single API call. c. Sorts by view count and picks the top ``digest_count`` videos. d. Posts a single rich embed digest to the configured Discord channel. @@ -25,7 +24,6 @@ channel_id – YouTube channel ID to watch discord_channel_id – Discord channel ID to post the digest in digest_day – Day of week for the digest (default: "thursday") - keywords – List of topic keywords to filter videos (OR logic) digest_count – Number of videos in the digest (default: 3) search_pool – Candidate pool size before view-count ranking (default: 20) """ @@ -34,7 +32,6 @@ import logging import os from datetime import datetime, timedelta, timezone -from typing import List import discord from discord import app_commands @@ -51,14 +48,6 @@ "friday": 4, "saturday": 5, "sunday": 6, } -_DEFAULT_KEYWORDS = [ - "GitHub Copilot", - "GitHub Copilot CLI", - "Security", - "Developer Skills", - "Company News", -] - class YouTubeWatcher(commands.Cog): """Background task that posts a weekly YouTube digest on Thursdays.""" @@ -69,7 +58,6 @@ def __init__(self, bot: commands.Bot) -> None: self.yt_channel_id: str = cfg["channel_id"] self.discord_channel_id: int = int(cfg["discord_channel_id"]) - self.keywords: List[str] = cfg.get("keywords", _DEFAULT_KEYWORDS) self.digest_count: int = int(cfg.get("digest_count", 3)) self.search_pool: int = int(cfg.get("search_pool", 20)) digest_day_str: str = str(cfg.get("digest_day", "thursday")).strip().lower() @@ -111,17 +99,15 @@ async def weekly_digest(self) -> None: return logger.info( - "Running weekly YouTube digest (keywords=%s, date=%s)", - self.keywords, + "Running weekly YouTube digest (date=%s)", today_str, ) today_midnight = now.replace(hour=0, minute=0, second=0, microsecond=0) since = today_midnight - timedelta(days=7) videos = await asyncio.to_thread( - self.yt_client.get_top_videos_by_keywords, + self.yt_client.get_top_recent_videos, channel_id=self.yt_channel_id, - keywords=self.keywords, published_after=since, top_n=self.digest_count, search_pool=self.search_pool, @@ -151,15 +137,14 @@ async def weekly_digest(self) -> None: if not videos: logger.warning( - "YouTube digest query returned no videos for keywords %s. " + "YouTube digest returned no videos for the past 7 days. " "Because an empty result may also indicate a YouTube API error, " "skipping the 'no videos' post and not marking the digest as sent " "so it can be retried later.", - self.keywords, ) return - embed = _build_digest_embed(videos, self.keywords, since, now) + embed = _build_digest_embed(videos, since, now) await channel.send(embed=embed) logger.info( "Posted YouTube weekly digest: %d video(s).", len(videos) @@ -194,9 +179,8 @@ async def youtubedigest(self, interaction: discord.Interaction) -> None: since = today_midnight - timedelta(days=7) videos = await asyncio.to_thread( - self.yt_client.get_top_videos_by_keywords, + self.yt_client.get_top_recent_videos, channel_id=self.yt_channel_id, - keywords=self.keywords, published_after=since, top_n=self.digest_count, search_pool=self.search_pool, @@ -230,7 +214,7 @@ async def youtubedigest(self, interaction: discord.Interaction) -> None: ) return - embed = _build_digest_embed(videos, self.keywords, since, now) + embed = _build_digest_embed(videos, since, now) await channel.send(embed=embed) logger.info("Manual YouTube digest posted by %s: %d video(s).", interaction.user, len(videos)) await interaction.followup.send( @@ -272,18 +256,16 @@ def _truncate(text: str, max_chars: int) -> str: def _build_digest_embed( videos: list, - keywords: List[str], since: datetime, now: datetime, ) -> discord.Embed: """Construct a Discord :class:`discord.Embed` for the weekly video digest.""" date_range = f"{since.strftime('%b %d')} – {now.strftime('%b %d, %Y')}" - topics_str = ", ".join(keywords) embed = discord.Embed( title="📺 GitHub — Weekly Video Digest", description=( f"Top GitHub YouTube videos from the past week ({date_range}), " - f"ranked by views.\n**Topics:** {topics_str}" + f"ranked by views." ), color=discord.Color.red(), ) diff --git a/config/config.yaml b/config/config.yaml index a66f684..55ed8a8 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -24,19 +24,10 @@ youtube: # Day of the week to post the digest (case-insensitive) digest_day: "thursday" - # Topics to filter videos by (OR logic – a video matches if any keyword appears - # in its title or description) - keywords: - - "GitHub Copilot" - - "GitHub Copilot CLI" - - "Security" - - "Developer Skills" - - "Company News" - # Number of top videos to include in the digest (ranked by view count) digest_count: 3 - # Candidate pool size: how many recent keyword-matching videos to fetch before + # Candidate pool size: how many recent videos to fetch before # ranking by view count and picking the top digest_count search_pool: 20 diff --git a/tests/test_cog_flows.py b/tests/test_cog_flows.py index 7866832..825b98c 100644 --- a/tests/test_cog_flows.py +++ b/tests/test_cog_flows.py @@ -27,7 +27,7 @@ class _FakeYouTubeClient: def __init__(self, api_key: str): self.api_key = api_key - def get_top_videos_by_keywords(self, **kwargs): + def get_top_recent_videos(self, **kwargs): return [ { "id": "vid-1", @@ -74,7 +74,6 @@ async def test_youtube_weekly_digest_fetches_channel_and_posts(self) -> None: "youtube": { "channel_id": "UC7c3Kb6jYCRj4JOHHZTxKsA", "discord_channel_id": 123456789012345678, - "keywords": ["GitHub Copilot"], "digest_count": 1, "search_pool": 10, "digest_day": _today_name_utc(), diff --git a/tests/test_youtube_api.py b/tests/test_youtube_api.py index 397831c..56d397b 100644 --- a/tests/test_youtube_api.py +++ b/tests/test_youtube_api.py @@ -48,17 +48,16 @@ def videos(self): class YouTubeApiTests(unittest.TestCase): - def test_search_by_keywords_requires_timezone_aware_datetime(self) -> None: + def test_search_recent_requires_timezone_aware_datetime(self) -> None: client = YouTubeClient.__new__(YouTubeClient) client._service = _FakeService() with self.assertRaises(ValueError): - client.search_by_keywords( + client.search_recent( channel_id="channel-id", - keywords=["Copilot"], published_after=datetime.now(), ) - def test_search_by_keywords_joins_list_keywords_and_maps_response(self) -> None: + def test_search_recent_maps_response_without_query_param(self) -> None: service = _FakeService( search_payload={ "items": [ @@ -77,14 +76,13 @@ def test_search_by_keywords_joins_list_keywords_and_maps_response(self) -> None: client = YouTubeClient.__new__(YouTubeClient) client._service = service - videos = client.search_by_keywords( + videos = client.search_recent( channel_id="channel-id", - keywords=["Copilot", "Security"], published_after=datetime(2026, 5, 1, tzinfo=timezone.utc), max_results=15, ) - self.assertEqual(service.search_capture["q"], "Copilot|Security") + self.assertNotIn("q", service.search_capture) self.assertEqual(service.search_capture["channelId"], "channel-id") self.assertEqual(service.search_capture["maxResults"], 15) self.assertEqual(videos[0]["id"], "abc123") @@ -108,11 +106,11 @@ def test_get_video_statistics_parses_view_counts(self) -> None: self.assertEqual(stats, {"vid1": 12, "vid2": 3000}) self.assertEqual(service.videos_capture["id"], "vid1,vid2") - def test_get_top_videos_by_keywords_sorts_descending_by_view_count(self) -> None: + def test_get_top_recent_videos_sorts_descending_by_view_count(self) -> None: client = YouTubeClient.__new__(YouTubeClient) with patch.object( client, - "search_by_keywords", + "search_recent", return_value=[ {"id": "a", "title": "A", "view_count": 0}, {"id": "b", "title": "B", "view_count": 0}, @@ -121,9 +119,8 @@ def test_get_top_videos_by_keywords_sorts_descending_by_view_count(self) -> None ), patch.object( client, "get_video_statistics", return_value={"a": 10, "b": 300, "c": 50} ): - top = client.get_top_videos_by_keywords( + top = client.get_top_recent_videos( channel_id="channel-id", - keywords=["Copilot"], published_after=datetime(2026, 5, 1, tzinfo=timezone.utc), top_n=2, search_pool=20, diff --git a/utils/youtube_api.py b/utils/youtube_api.py index 0df6938..fd61080 100644 --- a/utils/youtube_api.py +++ b/utils/youtube_api.py @@ -18,13 +18,10 @@ client = YouTubeClient(api_key="YOUR_KEY") - # Weekly digest: top 3 videos from the past 7 days matching any keyword - since = datetime.now(tz=timezone.utc) - timedelta(days=7) - keywords = ["GitHub Copilot", "GitHub Copilot CLI", "Security", - "Developer Skills", "Company News"] - videos = client.get_top_videos_by_keywords( + # Weekly digest: top 3 videos from the past 7 days (no keyword filter) + since = datetime.now(tz=timezone.utc) - timedelta(days=7) + videos = client.get_top_recent_videos( channel_id="UC7c3Kb6jYCRj4JOHHZTxKsA", - keywords=keywords, published_after=since, top_n=3, ) @@ -34,7 +31,7 @@ import logging from datetime import datetime -from typing import Dict, List, Union +from typing import Dict, List from googleapiclient.discovery import build from googleapiclient.errors import HttpError @@ -63,18 +60,13 @@ def __init__(self, api_key: str) -> None: # Public methods # ------------------------------------------------------------------ - def search_by_keywords( + def search_recent( self, channel_id: str, - keywords: Union[str, List[str]], published_after: datetime, max_results: int = 20, ) -> List[dict]: - """Search *channel_id* for videos matching any of *keywords* published after *published_after*. - - *keywords* may be a single string or a list of strings. When a list is - supplied the YouTube ``q`` parameter is constructed as - ``"term1|term2|term3"`` so the API returns results matching **any** term. + """Search *channel_id* for all videos published after *published_after*. Each returned dict contains: @@ -93,13 +85,6 @@ def search_by_keywords( "published_after must be a timezone-aware datetime (e.g. use timezone.utc)" ) - # Build the query string. The YouTube Data API supports OR via "|". - if isinstance(keywords, list): - query = "|".join(keywords) - else: - query = keywords - - # RFC 3339 format required by the YouTube Data API. published_after_str = published_after.strftime("%Y-%m-%dT%H:%M:%SZ") try: response = ( @@ -107,7 +92,6 @@ def search_by_keywords( .list( part="snippet", channelId=channel_id, - q=query, order="date", type="video", publishedAfter=published_after_str, @@ -117,9 +101,8 @@ def search_by_keywords( ) except HttpError as exc: logger.error( - "YouTube API search error (channel=%s, keywords=%s): %s", + "YouTube API search error (channel=%s): %s", channel_id, - keywords, exc, ) return [] @@ -175,26 +158,25 @@ def get_video_statistics(self, video_ids: List[str]) -> Dict[str, int]: stats[vid_id] = int(raw) return stats - def get_top_videos_by_keywords( + def get_top_recent_videos( self, channel_id: str, - keywords: Union[str, List[str]], published_after: datetime, top_n: int = 3, search_pool: int = 20, ) -> List[dict]: - """Return the top *top_n* videos matching *keywords* ranked by view count. + """Return the top *top_n* recent videos from *channel_id* ranked by view count. - 1. Searches *channel_id* for videos matching any term in *keywords* - published in the past week (up to *search_pool* candidates). + 1. Fetches up to *search_pool* videos from *channel_id* published after + *published_after* (no keyword filter). 2. Fetches view counts for all candidates in a single batch call. 3. Sorts by view count descending and returns the top *top_n*. - Each returned dict contains the same fields as :meth:`search_by_keywords` + Each returned dict contains the same fields as :meth:`search_recent` plus a populated ``view_count`` integer. """ - videos = self.search_by_keywords( - channel_id, keywords, published_after, max_results=search_pool + videos = self.search_recent( + channel_id, published_after, max_results=search_pool ) if not videos: return [] From 7d3d9271c8efe168ad54a5896e0565824b1c83a2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 14 May 2026 21:00:27 +0000 Subject: [PATCH 2/2] Fix review feedback for YouTube digest test and message --- bot/cogs/youtube_watcher.py | 2 +- tests/test_digest_helpers.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/bot/cogs/youtube_watcher.py b/bot/cogs/youtube_watcher.py index d7e27bc..4bfb742 100644 --- a/bot/cogs/youtube_watcher.py +++ b/bot/cogs/youtube_watcher.py @@ -209,7 +209,7 @@ async def youtubedigest(self, interaction: discord.Interaction) -> None: if not videos: await interaction.followup.send( - "⚠️ No matching YouTube videos found for the past 7 days.", + "⚠️ No recent YouTube videos found for the past 7 days.", ephemeral=True, ) return diff --git a/tests/test_digest_helpers.py b/tests/test_digest_helpers.py index 761beda..a158b49 100644 --- a/tests/test_digest_helpers.py +++ b/tests/test_digest_helpers.py @@ -29,7 +29,6 @@ def test_build_youtube_embed_contains_expected_fields(self) -> None: "view_count": 1200, } ], - keywords=["GitHub Copilot", "Security"], since=now, now=now, )