yayajjiang · yayajjiang · May 17, 2026
diff --git a/src/lib/daily.ts b/src/lib/daily.ts
@@ -17,6 +17,69 @@ export interface DailyPaper {
 }
 
 export const dailyPapers: DailyPaper[] = [
+  {
+    date: "2026-05-17",
+    title: "Beyond Reasoning: Reinforcement Learning Unlocks Parametric Knowledge in LLMs",
+    titleZh: "超越推理：强化学习激活LLM中的参数知识",
+    authors: "Wanli Yang et al.",
+    arxivId: "2605.07153",
+    tags: ["Reasoning", "RLHF"],
+    why: "RL improves factual recall ~27% in zero-shot QA without CoT — by redistributing probability mass over existing knowledge, not acquiring new facts. Challenges the 'RL = reasoning' framing.",
+    whyZh: "RL在无CoT零样本问答中提升约27%事实召回率，机制是重分配已有知识概率质量而非习得新知，有力挑战「强化学习=推理」叙事。",
+    pick: true,
+  },
+  {
+    date: "2026-05-17",
+    title: "Theoretical Limits of Language Model Alignment",
+    titleZh: "语言模型对齐的理论极限",
+    authors: "Lucas Monteiro Paes et al.",
+    arxivId: "2605.07105",
+    tags: ["Theory", "Alignment"],
+    why: "Apple derives the KL-reward Pareto frontier for alignment in closed form — best-of-N nearly reaches theoretical optimal while PPO and GRPO remain far suboptimal.",
+    whyZh: "Apple推导对齐的信息论KL-奖励帕累托前沿，best-of-N接近理论最优，PPO/GRPO仍大幅低于极限，为对齐研究提供理论锚点。",
+    pick: true,
+  },
+  {
+    date: "2026-05-17",
+    title: "Block-R1: Rethinking the Role of Block Size in Multi-domain Reinforcement Learning for Diffusion Large Language Models",
+    titleZh: "Block-R1：重思多领域强化学习中扩散大语言模型的块尺寸",
+    authors: "Yan Jiang et al.",
+    arxivId: "2605.11726",
+    tags: ["Diffusion LM", "Reasoning", "RLHF"],
+    why: "Domain block-size conflict bottlenecks multi-domain dLLM RL post-training; Block-R1-41K assigns per-sample optimal block sizes to resolve cross-domain tension in GRPO.",
+    whyZh: "块尺寸冲突是多领域dLLM RL后训练的关键瓶颈，Block-R1-41K为每个样本分配最优块尺寸，有效缓解GRPO跨领域训练张力。",
+    pick: true,
+  },
+  {
+    date: "2026-05-17",
+    title: "Break the Block: Dynamic-size Reasoning Blocks for Diffusion LLMs via Monotonic Entropy Descent with RL",
+    titleZh: "打破块限制：单调熵下降强化学习驱动的扩散LLM动态推理块",
+    authors: "Yan Jiang et al.",
+    arxivId: "2605.02263",
+    tags: ["Diffusion LM", "Reasoning", "Efficient Inference"],
+    why: "Fixed block size in dLLMs breaks reasoning coherence; monotonic entropy descent RL learns adaptive block boundaries, improving quality across diverse reasoning tasks.",
+    whyZh: "固定块尺寸破坏扩散LLM推理连贯性，单调熵下降RL框架自适应学习块边界，在多类推理任务中显著提升生成质量。",
+  },
+  {
+    date: "2026-05-17",
+    title: "Continuous Latent Diffusion Language Model",
+    titleZh: "连续潜变量扩散语言模型（Cola DLM）",
+    authors: "Hongcan Guo et al.",
+    arxivId: "2605.06548",
+    tags: ["Diffusion LM", "Theory"],
+    why: "Hierarchical Text VAE + block-causal DiT models text in continuous latent space — bypasses the discrete token bottleneck entirely, opening a new paradigm for diffusion LMs.",
+    whyZh: "分层Text VAE与块因果DiT在连续潜空间中建模文本，彻底绕开离散token瓶颈，为扩散语言模型开辟全新范式。",
+  },
+  {
+    date: "2026-05-17",
+    title: "LatentRAG: Latent Reasoning and Retrieval for Efficient Agentic RAG",
+    titleZh: "LatentRAG：面向高效智能体RAG的隐空间推理与检索",
+    authors: "Yijia Zheng and Marcel Worring",
+    arxivId: "2605.06285",
+    tags: ["RAG", "Agent", "Efficient Inference"],
+    why: "Latent-space reasoning and retrieval replaces token-by-token CoT in agentic RAG — ~90% latency cut across 7 benchmarks while matching explicit reasoning accuracy.",
+    whyZh: "在连续潜空间中完成智能体RAG的推理与检索，完全替代逐token生成，7项基准上匹配显式推理精度，延迟降低约90%。",
+  },
   {
     date: "2026-04-23",
     title: "DeepSeek-V4 Technical Report",