From 47eacaf134f6b34ece23da38a3314876b9ef4b7b Mon Sep 17 00:00:00 2001
From: suheon <dreasm12345@naver.com>
Date: Fri, 24 Apr 2026 14:09:29 +0900
Subject: [PATCH] =?UTF-8?q?feat(trend):=20Top=205=20=EC=BD=98=ED=85=90?=
 =?UTF-8?q?=EC=B8=A0=20LLM=20=EC=84=9C=EC=82=AC=20=EC=9A=94=EC=95=BD=20?=
 =?UTF-8?q?=EA=B5=AC=ED=98=84=20(DP-404)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- SummaryRepository.find_summaries_for_trend(): mid 레벨 one_line_summary + keywords 배치 조회
- app/core/prompts/trend_top_posts.py: 서사 요약 프롬프트 + 일단위 시간 범위 레이블(M월 D일 HH시~HH시) + 이전 기간 비교 섹션
- TopPostsSummaryGenerator.generate(): prev_summary 주입으로 이전 기간 차이점 서술 지원

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 app/core/prompts/trend_top_posts.py     | 103 +++++++++++++++
 app/repositories/summary_repository.py  |  30 +++++
 app/services/trend/top_posts_summary.py |  96 ++++++++++++++
 tests/test_summary_repository.py        |  53 ++++++++
 tests/test_trend_top_posts_summary.py   | 167 ++++++++++++++++++++++++
 5 files changed, 449 insertions(+)
 create mode 100644 app/core/prompts/trend_top_posts.py
 create mode 100644 app/services/trend/top_posts_summary.py
 create mode 100644 tests/test_trend_top_posts_summary.py

diff --git a/app/core/prompts/trend_top_posts.py b/app/core/prompts/trend_top_posts.py
new file mode 100644
index 0000000..51892e0
--- /dev/null
+++ b/app/core/prompts/trend_top_posts.py
@@ -0,0 +1,103 @@
+"""Top 5 콘텐츠 주제 흐름 서사 요약 프롬프트 (DP-404)."""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+SYSTEM_PROMPT = """\
+당신은 개발자 커뮤니티 동향 분석 전문가입니다.
+
+이번 기간 가장 많이 조회된 글 정보를 바탕으로,
+개발자들이 어떤 주제와 기술에 관심을 가졌는지 1~3 문단의 서사 요약을 작성합니다.
+
+## 작성 원칙
+- 특정 글을 "1위", "N번째 글" 등으로 직접 지칭하지 않습니다
+- 조회수 숫자를 직접 언급하지 않습니다
+- 광고성·추천성 표현을 사용하지 않습니다
+- "프론트엔드 생태계 비교", "Spring + Kotlin 조합에 대한 관심" 같은 주제 군집 표현을 씁니다
+- 200~400자 내외로 작성합니다
+- 추천 액션은 출력하지 않습니다 (Insight/주간 리포트와 역할 분리)
+- 이전 기간 요약이 제공된 경우, 마지막 문단에서 이전 기간과의 주제 변화를 자연스럽게 서술합니다
+  - 예: "이번 주는 ~~한 흐름이 두드러지며, 지난 기간 ~~했던 것과 달리 ~~쪽으로 관심이 이동했습니다."
+- 이전 기간 요약이 없으면 현재 기간만 서술합니다
+"""
+
+TOOL_SAVE_TOP_POSTS_SUMMARY = {
+    "name": "save_top_posts_summary",
+    "description": "Top 5 글의 주제 흐름 서사 요약을 저장한다.",
+    "input_schema": {
+        "type": "object",
+        "properties": {
+            "top_posts_summary": {
+                "type": "string",
+                "description": "Top 5 글의 주제 흐름 서사 요약 (200~400자, 1~3 문단)",
+            }
+        },
+        "required": ["top_posts_summary"],
+    },
+}
+
+
+def _format_daily_label(period_start: str, period_end: str) -> str:
+    try:
+        s = datetime.fromisoformat(period_start)
+        e = datetime.fromisoformat(period_end)
+        return f"{s.month}월 {s.day}일 {s.hour}시~{e.hour}시"
+    except (ValueError, TypeError):
+        return "오늘"
+
+
+def build_user_prompt(
+    top_contents: list[dict],
+    summary_meta: dict[str, dict],
+    period_start: str = "",
+    period_end: str = "",
+    unit: str = "weekly",
+    prev_summary: str | None = None,
+) -> str:
+    if unit == "daily":
+        period_label = _format_daily_label(period_start, period_end)
+    elif unit == "weekly":
+        period_label = "이번 주"
+    elif unit == "monthly":
+        period_label = "이번 달"
+    else:
+        period_label = "이번 기간"
+
+    lines: list[str] = [f"기간: {period_label}", ""]
+    lines.append("## 이번 기간 주목받은 글 (조회수 상위 5편)")
+    lines.append("")
+
+    for idx, content in enumerate(top_contents, 1):
+        cid = content.get("id", "")
+        meta = summary_meta.get(cid, {})
+        title = content.get("translated_title") or content.get("title", "")
+        category = content.get("category") or ""
+        tags = content.get("tags") or []
+        if isinstance(tags, str):
+            import json
+
+            try:
+                tags = json.loads(tags)
+            except (ValueError, TypeError):
+                tags = []
+        keywords = meta.get("keywords", [])
+        one_line_summary = meta.get("one_line_summary", "")
+
+        lines.append(f"[{idx}] 제목: {title}")
+        if category:
+            lines.append(f"    분류: {category}")
+        if tags:
+            lines.append(f"    태그: {', '.join(tags)}")
+        if keywords:
+            lines.append(f"    핵심 키워드: {', '.join(keywords)}")
+        if one_line_summary:
+            lines.append(f"    한줄 요약: {one_line_summary}")
+        lines.append("")
+
+    if prev_summary:
+        lines.append("## 이전 기간 요약 (참고용)")
+        lines.append(prev_summary)
+        lines.append("")
+
+    return "\n".join(lines).rstrip()
diff --git a/app/repositories/summary_repository.py b/app/repositories/summary_repository.py
index 63eabbb..a417f45 100644
--- a/app/repositories/summary_repository.py
+++ b/app/repositories/summary_repository.py
@@ -131,6 +131,36 @@ def find_all_levels(self, content_id: str) -> list[dict]:
         )
         return resp.get("Items", [])
 
+    def find_summaries_for_trend(self, content_ids: list[str]) -> dict[str, dict]:
+        """트렌드 top_posts_summary 용 mid 레벨 요약 메타 배치 조회.
+
+        Returns:
+            {content_id: {one_line_summary, keywords, tags, category}}
+        누락 항목은 빈 값/빈 리스트로 처리.
+        """
+        if not content_ids:
+            return {}
+
+        keys = [{"content_id": cid, "level": "mid"} for cid in content_ids]
+        result: dict[str, dict] = {}
+
+        for i in range(0, len(keys), 100):
+            chunk = keys[i : i + 100]
+            resp = self._dynamodb.batch_get_item(
+                RequestItems={self._table_name: {"Keys": chunk}}
+            )
+            for item in resp.get("Responses", {}).get(self._table_name, []):
+                cid = item.get("content_id")
+                if cid:
+                    result[cid] = {
+                        "one_line_summary": item.get("one_line_summary", ""),
+                        "keywords": list(item.get("keywords", [])),
+                        "tags": list(item.get("tags", [])),
+                        "category": item.get("category", ""),
+                    }
+
+        return result
+
     def find_by_content_ids(self, content_ids: list[str]) -> list[dict]:
         """여러 content_id의 요약을 조회한다.
 
diff --git a/app/services/trend/top_posts_summary.py b/app/services/trend/top_posts_summary.py
new file mode 100644
index 0000000..d45cbfa
--- /dev/null
+++ b/app/services/trend/top_posts_summary.py
@@ -0,0 +1,96 @@
+"""Top 5 콘텐츠 주제 흐름 서사 요약 생성 서비스 (DP-404)."""
+
+from __future__ import annotations
+
+import logging
+
+import boto3
+from botocore.config import Config
+from botocore.exceptions import ClientError, EndpointConnectionError, ReadTimeoutError
+
+from app.core.bedrock import to_tool_config
+from app.core.exceptions import AIInternalError, AITimeoutError, AIUpstreamError
+from app.core.prompts.trend_top_posts import (
+    SYSTEM_PROMPT,
+    TOOL_SAVE_TOP_POSTS_SUMMARY,
+    build_user_prompt,
+)
+from app.repositories.summary_repository import SummaryRepository
+
+logger = logging.getLogger(__name__)
+
+_TOOL_NAME = "save_top_posts_summary"
+
+
+class TopPostsSummaryGenerator:
+    """기간 조회수 Top 5 콘텐츠의 주제 흐름을 LLM이 서사 요약한다."""
+
+    def __init__(
+        self,
+        aws_region: str = "ap-northeast-2",
+        model: str = "global.anthropic.claude-sonnet-4-6",
+        summary_repo: SummaryRepository | None = None,
+    ) -> None:
+        self._client = boto3.client(
+            "bedrock-runtime",
+            region_name=aws_region,
+            config=Config(read_timeout=120, retries={"max_attempts": 0}),
+        )
+        self._model = model
+        self._summary_repo = summary_repo or SummaryRepository(aws_region=aws_region)
+
+    def generate(
+        self,
+        top_contents: list[dict],
+        unit: str,
+        period_start: str = "",
+        period_end: str = "",
+        prev_summary: str | None = None,
+    ) -> str | None:
+        """Top 5 콘텐츠 주제 흐름을 LLM이 서사 요약한다.
+
+        top_contents 빈 리스트 → None 반환.
+        LLM 실패(AIUpstreamError, AITimeoutError) → None 반환 (스냅샷 저장 계속).
+        prev_summary: 이전 기간 top_posts_summary — 있으면 프롬프트에 주입해 차이점 서술 유도.
+        """
+        if not top_contents:
+            return None
+
+        content_ids = [c["id"] for c in top_contents]
+        summary_meta = self._summary_repo.find_summaries_for_trend(content_ids)
+        user_prompt = build_user_prompt(
+            top_contents, summary_meta, period_start, period_end, unit, prev_summary
+        )
+
+        try:
+            response = self._client.converse(
+                modelId=self._model,
+                system=[
+                    {
+                        "text": SYSTEM_PROMPT,
+                        "cacheControl": {"type": "ephemeral"},
+                    }
+                ],
+                messages=[{"role": "user", "content": [{"text": user_prompt}]}],
+                toolConfig=to_tool_config(TOOL_SAVE_TOP_POSTS_SUMMARY, _TOOL_NAME),
+                inferenceConfig={"maxTokens": 1024, "temperature": 0.4},
+            )
+        except ReadTimeoutError as exc:
+            logger.warning("top_posts_summary 생성 타임아웃 — skip: %s", exc)
+            raise AITimeoutError() from exc
+        except EndpointConnectionError as exc:
+            logger.warning("top_posts_summary 생성 연결 실패 — skip: %s", exc)
+            raise AIUpstreamError() from exc
+        except ClientError as exc:
+            code = exc.response.get("Error", {}).get("Code", "")
+            if "ThrottlingException" in code or "ServiceUnavailable" in code:
+                logger.warning("top_posts_summary Rate Limit — skip: %s", exc)
+                raise AIUpstreamError() from exc
+            raise AIUpstreamError(str(exc)) from exc
+
+        content_blocks = response["output"]["message"]["content"]
+        tool_use_block = next((b for b in content_blocks if "toolUse" in b), None)
+        if not tool_use_block:
+            raise AIInternalError("tool_use 블록 없음")
+
+        return tool_use_block["toolUse"]["input"].get("top_posts_summary")
diff --git a/tests/test_summary_repository.py b/tests/test_summary_repository.py
index 79218c2..62763c0 100644
--- a/tests/test_summary_repository.py
+++ b/tests/test_summary_repository.py
@@ -162,6 +162,59 @@ def test_find_meta_by_content_ids_chunks_over_100() -> None:
     assert mock_dynamodb.batch_get_item.call_count == 2
 
 
+# ── find_summaries_for_trend ─────────────────────────────────────────────────
+
+
+def test_find_summaries_for_trend_returns_mid_level() -> None:
+    with patch("boto3.resource") as mock_resource:
+        mock_dynamodb = MagicMock()
+        mock_resource.return_value = mock_dynamodb
+        mock_dynamodb.Table.return_value = MagicMock()
+        instance = SummaryRepository(aws_region="us-east-1")
+    instance._dynamodb = mock_dynamodb
+
+    mock_dynamodb.batch_get_item.return_value = {
+        "Responses": {
+            "ai_summaries": [
+                {
+                    "content_id": "cid-1",
+                    "level": "mid",
+                    "one_line_summary": "Redis TTL 설정 전략",
+                    "keywords": ["TTL", "캐시"],
+                    "tags": ["Redis", "Backend"],
+                    "category": "Backend",
+                }
+            ]
+        }
+    }
+
+    result = instance.find_summaries_for_trend(["cid-1"])
+
+    assert result["cid-1"]["one_line_summary"] == "Redis TTL 설정 전략"
+    assert result["cid-1"]["keywords"] == ["TTL", "캐시"]
+    assert result["cid-1"]["tags"] == ["Redis", "Backend"]
+    assert result["cid-1"]["category"] == "Backend"
+    # mid 레벨 키로 조회했는지 확인
+    call_keys = mock_dynamodb.batch_get_item.call_args.kwargs["RequestItems"][
+        "ai_summaries"
+    ]["Keys"]
+    assert all(k["level"] == "mid" for k in call_keys)
+
+
+def test_find_summaries_for_trend_empty_input_returns_empty() -> None:
+    with patch("boto3.resource") as mock_resource:
+        mock_dynamodb = MagicMock()
+        mock_resource.return_value = mock_dynamodb
+        mock_dynamodb.Table.return_value = MagicMock()
+        instance = SummaryRepository(aws_region="us-east-1")
+    instance._dynamodb = mock_dynamodb
+
+    result = instance.find_summaries_for_trend([])
+
+    assert result == {}
+    mock_dynamodb.batch_get_item.assert_not_called()
+
+
 def test_save_all_levels_includes_created_at_if_not_exists(
     repo: SummaryRepository, mock_table: MagicMock
 ) -> None:
diff --git a/tests/test_trend_top_posts_summary.py b/tests/test_trend_top_posts_summary.py
new file mode 100644
index 0000000..a802aa3
--- /dev/null
+++ b/tests/test_trend_top_posts_summary.py
@@ -0,0 +1,167 @@
+"""TopPostsSummaryGenerator 단위 테스트 — mock 기반, 실제 API 호출 없음 (DP-404)."""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+from botocore.exceptions import EndpointConnectionError
+
+from app.core.exceptions import AIUpstreamError
+from app.services.trend.top_posts_summary import TopPostsSummaryGenerator
+
+
+def _make_generator(
+    summary_text: str = "이번 주 트렌드 요약",
+) -> tuple[TopPostsSummaryGenerator, MagicMock]:
+    """mock Bedrock 클라이언트와 mock SummaryRepo를 주입한 Generator를 반환한다."""
+    mock_repo = MagicMock()
+    mock_repo.find_summaries_for_trend.return_value = {}
+
+    with patch("boto3.client"):
+        gen = TopPostsSummaryGenerator(aws_region="us-east-1", summary_repo=mock_repo)
+
+    mock_client = MagicMock()
+    mock_client.converse.return_value = {
+        "output": {
+            "message": {
+                "content": [
+                    {
+                        "toolUse": {
+                            "toolUseId": "tool-1",
+                            "name": "save_top_posts_summary",
+                            "input": {"top_posts_summary": summary_text},
+                        }
+                    }
+                ]
+            }
+        }
+    }
+    gen._client = mock_client
+    return gen, mock_client
+
+
+def _make_top_contents(n: int = 2) -> list[dict]:
+    return [
+        {
+            "id": f"cid-{i}",
+            "title": f"글 {i}",
+            "translated_title": None,
+            "category": "Backend",
+            "tags": '["Python"]',
+            "source_id": "src-1",
+            "published_at": None,
+        }
+        for i in range(1, n + 1)
+    ]
+
+
+# ── 정상 흐름 ─────────────────────────────────────────────────────────────────
+
+
+def test_generate_returns_summary_on_success() -> None:
+    gen, _ = _make_generator("이번 주 트렌드 요약")
+
+    result = gen.generate(
+        top_contents=_make_top_contents(),
+        unit="weekly",
+        period_start="2026-04-14",
+        period_end="2026-04-21",
+    )
+
+    assert result == "이번 주 트렌드 요약"
+
+
+# ── 빈 입력 ───────────────────────────────────────────────────────────────────
+
+
+def test_generate_returns_none_for_empty_contents() -> None:
+    gen, mock_client = _make_generator()
+
+    result = gen.generate(top_contents=[], unit="weekly")
+
+    assert result is None
+    mock_client.converse.assert_not_called()
+
+
+# ── LLM 실패 ─────────────────────────────────────────────────────────────────
+
+
+def test_generate_raises_ai_upstream_error_on_llm_failure() -> None:
+    gen, mock_client = _make_generator()
+    mock_client.converse.side_effect = EndpointConnectionError(
+        endpoint_url="http://test"
+    )
+
+    with pytest.raises(AIUpstreamError):
+        gen.generate(top_contents=_make_top_contents(), unit="weekly")
+
+
+# ── daily period label ────────────────────────────────────────────────────────
+
+
+def test_generate_daily_period_label_uses_time_range() -> None:
+    gen, mock_client = _make_generator()
+
+    gen.generate(
+        top_contents=_make_top_contents(),
+        unit="daily",
+        period_start="2026-04-24T08:00:00",
+        period_end="2026-04-24T16:00:00",
+    )
+
+    call_messages = mock_client.converse.call_args.kwargs["messages"]
+    user_text = call_messages[0]["content"][0]["text"]
+    assert "4월 24일 8시~16시" in user_text
+
+
+# ── prev_summary 주입 ─────────────────────────────────────────────────────────
+
+
+def test_generate_with_prev_summary_includes_in_prompt() -> None:
+    gen, mock_client = _make_generator()
+    prev = "지난 주에는 Kubernetes 관련 글이 주목받았습니다."
+
+    gen.generate(
+        top_contents=_make_top_contents(),
+        unit="weekly",
+        prev_summary=prev,
+    )
+
+    call_messages = mock_client.converse.call_args.kwargs["messages"]
+    user_text = call_messages[0]["content"][0]["text"]
+    assert "이전 기간 요약" in user_text
+    assert prev in user_text
+
+
+# ── one_line_summary 누락 ─────────────────────────────────────────────────────
+
+
+def test_generate_handles_missing_one_line_summary() -> None:
+    mock_repo = MagicMock()
+    mock_repo.find_summaries_for_trend.return_value = {}
+
+    with patch("boto3.client"):
+        gen = TopPostsSummaryGenerator(aws_region="us-east-1", summary_repo=mock_repo)
+
+    mock_client = MagicMock()
+    mock_client.converse.return_value = {
+        "output": {
+            "message": {
+                "content": [
+                    {
+                        "toolUse": {
+                            "toolUseId": "tool-1",
+                            "name": "save_top_posts_summary",
+                            "input": {"top_posts_summary": "요약 결과"},
+                        }
+                    }
+                ]
+            }
+        }
+    }
+    gen._client = mock_client
+
+    result = gen.generate(top_contents=_make_top_contents(), unit="weekly")
+
+    assert result == "요약 결과"