From 911381ad1bc47822fe644608703d69d9f4adc225 Mon Sep 17 00:00:00 2001 From: suheon Date: Fri, 24 Apr 2026 10:28:52 +0900 Subject: [PATCH] =?UTF-8?q?feat(trend):=20=ED=8A=B8=EB=A0=8C=EB=93=9C=20?= =?UTF-8?q?=EB=9E=AD=ED=82=B9=20=EA=B5=AC=ED=98=84=20=E2=80=94=20Top=205?= =?UTF-8?q?=20=EC=A1=B0=ED=9A=8C=20=EC=BD=98=ED=85=90=EC=B8=A0=20+=20Top?= =?UTF-8?q?=2010=20=ED=83=9C=EA=B7=B8=20(DP-383)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 --- app/repositories/content_repository.py | 16 ++++ app/services/trend/ranking.py | 97 ++++++++++++++++++++++ tests/test_content_repository.py | 37 +++++++++ tests/test_trend_ranking.py | 110 +++++++++++++++++++++++++ 4 files changed, 260 insertions(+) create mode 100644 app/services/trend/ranking.py create mode 100644 tests/test_trend_ranking.py diff --git a/app/repositories/content_repository.py b/app/repositories/content_repository.py index ae1706a..4df122e 100644 --- a/app/repositories/content_repository.py +++ b/app/repositories/content_repository.py @@ -232,6 +232,22 @@ def find_by_published_range(self, start: datetime, end: datetime) -> list[dict]: ) return [dict(row) for row in result.mappings().fetchall()] + def find_by_ids(self, content_ids: list[str]) -> list[dict]: + """content_id 목록으로 콘텐츠 상세를 조회한다.""" + if not content_ids: + return [] + with self._engine.begin() as conn: + result = conn.execute( + text( + "SELECT id, title, translated_title, category, tags," + " source_id, published_at" + " FROM contents" + " WHERE id = ANY(:ids) AND is_available = true" + ), + {"ids": list(content_ids)}, + ) + return [dict(row) for row in result.mappings().fetchall()] + def find_view_counts_by_period( self, start: datetime, end: datetime ) -> dict[str, int]: diff --git a/app/services/trend/ranking.py b/app/services/trend/ranking.py new file mode 100644 index 0000000..8b03b99 --- /dev/null +++ b/app/services/trend/ranking.py @@ -0,0 +1,97 @@ +"""트렌드 랭킹 — Top 5 조회 콘텐츠 + Top 10 태그 선정 (DP-383).""" + +from __future__ import annotations + +from dataclasses import dataclass + +from app.services.trend.frequency import TagFrequency + + +@dataclass +class RankedTag: + keyword: str + cur_count: int + prev_count: int + delta: int + growth_rate: float | None + state: str + tag_count: int + score: float + + +def _clip(value: float, lo: float, hi: float) -> float: + return max(lo, min(hi, value)) + + +class TrendRanker: + """기간 조회수 기반 Top 5 콘텐츠와 복합 점수 기반 Top 10 태그를 선정한다.""" + + def __init__(self, top_contents: int = 5, top_tags: int = 10) -> None: + self._top_contents = top_contents + self._top_tags = top_tags + + def rank_contents( + self, + cur_view_counts: dict[str, int], + content_details: list[dict], + ) -> list[dict]: + """기간 조회수 내림차순 Top N 콘텐츠를 반환한다. + + content_details: ContentRepository.find_by_ids() 결과 + 반환 dict에 view_count 필드 추가. + """ + if not cur_view_counts or not content_details: + return [] + details_map = {c["id"]: c for c in content_details} + top_ids = sorted( + cur_view_counts, key=cur_view_counts.__getitem__, reverse=True + )[: self._top_contents] + result = [] + for cid in top_ids: + if cid in details_map: + item = dict(details_map[cid]) + item["view_count"] = cur_view_counts[cid] + result.append(item) + return result + + def rank_tags( + self, + tag_frequencies: list[TagFrequency], + summary_meta: dict[str, dict], + ) -> list[RankedTag]: + """태그 복합 점수 계산 후 Top N 을 반환한다. + + score = 0.5 × delta + + 0.5 × clip(growth_rate, -3.0, 3.0) + + (2.0 if category_match else 0.0) # α=2 + + (0.5 if state="new" else 0.0) # new_bonus + growth_rate=None(state="new") → 0.0 처리 + """ + categories = { + meta["category"] for meta in summary_meta.values() if meta.get("category") + } + ranked: list[RankedTag] = [] + for tf in tag_frequencies: + category_match = tf.keyword in categories + tag_count = tf.cur_count + (2 if category_match else 0) + gr = tf.growth_rate if tf.growth_rate is not None else 0.0 + score = ( + 0.5 * tf.delta + + 0.5 * _clip(gr, -3.0, 3.0) + + (2.0 if category_match else 0.0) + + (0.5 if tf.state == "new" else 0.0) + ) + ranked.append( + RankedTag( + keyword=tf.keyword, + cur_count=tf.cur_count, + prev_count=tf.prev_count, + delta=tf.delta, + growth_rate=tf.growth_rate, + state=tf.state, + tag_count=tag_count, + score=round(score, 4), + ) + ) + ranked.sort(key=lambda x: x.score, reverse=True) + return ranked[: self._top_tags] diff --git a/tests/test_content_repository.py b/tests/test_content_repository.py index 12e6f31..448c7c9 100644 --- a/tests/test_content_repository.py +++ b/tests/test_content_repository.py @@ -314,6 +314,43 @@ def test_find_view_counts_by_period_empty_returns_empty_dict() -> None: assert result == {} +# ── find_by_ids ─────────────────────────────────────────────────────────────── + + +def test_find_by_ids_returns_rows() -> None: + from datetime import datetime, timezone + + repo, mock_engine = _make_repo() + mock_conn = MagicMock() + mock_engine.begin.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_engine.begin.return_value.__exit__ = MagicMock(return_value=False) + + mock_conn.execute.return_value.mappings.return_value.fetchall.return_value = [ + { + "id": "cid-1", + "title": "글1", + "translated_title": None, + "category": "Backend", + "tags": '["python"]', + "source_id": "src-1", + "published_at": datetime(2026, 4, 14, tzinfo=timezone.utc), + } + ] + + result = repo.find_by_ids(["cid-1"]) + + assert len(result) == 1 + assert result[0]["id"] == "cid-1" + assert result[0]["category"] == "Backend" + call_params = mock_conn.execute.call_args.args[1] + assert call_params["ids"] == ["cid-1"] + + +def test_find_by_ids_empty_returns_empty_list() -> None: + repo, _ = _make_repo() + assert repo.find_by_ids([]) == [] + + # ── source 자동 생성 ────────────────────────────────────────────────────────── diff --git a/tests/test_trend_ranking.py b/tests/test_trend_ranking.py new file mode 100644 index 0000000..5739779 --- /dev/null +++ b/tests/test_trend_ranking.py @@ -0,0 +1,110 @@ +"""TrendRanker 단위 테스트 (DP-383).""" + +from __future__ import annotations + +from app.services.trend.frequency import TagFrequency +from app.services.trend.ranking import TrendRanker + + +def _ranker() -> TrendRanker: + return TrendRanker(top_contents=5, top_tags=10) + + +def _tf( + keyword: str, + cur: int = 3, + prev: int = 3, + delta: int = 0, + growth_rate: float | None = 0.0, + state: str = "same", +) -> TagFrequency: + return TagFrequency( + keyword=keyword, + cur_count=cur, + prev_count=prev, + delta=delta, + growth_rate=growth_rate, + state=state, + ) + + +def _details(ids: list[str]) -> list[dict]: + return [ + { + "id": cid, + "title": f"글 {cid}", + "translated_title": None, + "category": None, + "tags": "[]", + "source_id": "src-1", + "published_at": None, + } + for cid in ids + ] + + +# ── Top 5 콘텐츠 ────────────────────────────────────────────────────────────── + + +def test_rank_contents_sorted_by_view_count() -> None: + view_counts = {"cid-1": 10, "cid-2": 50, "cid-3": 5} + details = _details(["cid-1", "cid-2", "cid-3"]) + result = _ranker().rank_contents(view_counts, details) + assert [r["id"] for r in result] == ["cid-2", "cid-1", "cid-3"] + + +def test_rank_contents_view_count_zero_included() -> None: + view_counts = {"cid-1": 0} + details = _details(["cid-1"]) + result = _ranker().rank_contents(view_counts, details) + assert len(result) == 1 + assert result[0]["view_count"] == 0 + + +def test_rank_contents_adds_view_count_field() -> None: + view_counts = {"cid-1": 7} + details = _details(["cid-1"]) + result = _ranker().rank_contents(view_counts, details) + assert result[0]["view_count"] == 7 + + +# ── Top 10 태그 ─────────────────────────────────────────────────────────────── + + +def test_rank_tags_new_bonus() -> None: + tf_new = _tf("rust", cur=3, prev=0, delta=3, growth_rate=None, state="new") + tf_same = _tf("python", cur=3, prev=3, delta=0, growth_rate=0.0, state="same") + result = _ranker().rank_tags([tf_new, tf_same], {}) + new_tag = next(r for r in result if r.keyword == "rust") + same_tag = next(r for r in result if r.keyword == "python") + assert new_tag.score > same_tag.score + assert new_tag.score == round(0.5 * 3 + 0.5 * 0.0 + 0.5, 4) + + +def test_rank_tags_category_match_bonus() -> None: + tf = _tf("backend", cur=5, prev=3, delta=2, growth_rate=2.0, state="up") + summary_meta = {"cid-1": {"tags": ["backend"], "category": "backend"}} + result = _ranker().rank_tags([tf], summary_meta) + assert result[0].tag_count == 7 # 5 + 2 + assert result[0].score == round(0.5 * 2 + 0.5 * 2.0 + 2.0, 4) + + +def test_rank_tags_growth_rate_none_safe() -> None: + tf = _tf("new-tech", cur=4, prev=0, delta=4, growth_rate=None, state="new") + result = _ranker().rank_tags([tf], {}) + assert len(result) == 1 + assert isinstance(result[0].score, float) + import math + + assert not math.isnan(result[0].score) + + +def test_rank_tags_sorted_by_score() -> None: + tags = [ + _tf("a", cur=2, prev=1, delta=1, growth_rate=1.0, state="up"), + _tf("b", cur=5, prev=3, delta=2, growth_rate=2.0, state="up"), + _tf("c", cur=3, prev=3, delta=0, growth_rate=0.0, state="same"), + ] + result = _ranker().rank_tags(tags, {}) + scores = [r.score for r in result] + assert scores == sorted(scores, reverse=True)