diff --git a/src/lib/daily.ts b/src/lib/daily.ts index 5f0cca8..1d512da 100644 --- a/src/lib/daily.ts +++ b/src/lib/daily.ts @@ -17,6 +17,69 @@ export interface DailyPaper { } export const dailyPapers: DailyPaper[] = [ + { + date: "2026-05-17", + title: "Beyond Reasoning: Reinforcement Learning Unlocks Parametric Knowledge in LLMs", + titleZh: "超越推理:强化学习激活LLM中的参数知识", + authors: "Wanli Yang et al.", + arxivId: "2605.07153", + tags: ["Reasoning", "RLHF"], + why: "RL improves factual recall ~27% in zero-shot QA without CoT — by redistributing probability mass over existing knowledge, not acquiring new facts. Challenges the 'RL = reasoning' framing.", + whyZh: "RL在无CoT零样本问答中提升约27%事实召回率,机制是重分配已有知识概率质量而非习得新知,有力挑战「强化学习=推理」叙事。", + pick: true, + }, + { + date: "2026-05-17", + title: "Theoretical Limits of Language Model Alignment", + titleZh: "语言模型对齐的理论极限", + authors: "Lucas Monteiro Paes et al.", + arxivId: "2605.07105", + tags: ["Theory", "Alignment"], + why: "Apple derives the KL-reward Pareto frontier for alignment in closed form — best-of-N nearly reaches theoretical optimal while PPO and GRPO remain far suboptimal.", + whyZh: "Apple推导对齐的信息论KL-奖励帕累托前沿,best-of-N接近理论最优,PPO/GRPO仍大幅低于极限,为对齐研究提供理论锚点。", + pick: true, + }, + { + date: "2026-05-17", + title: "Block-R1: Rethinking the Role of Block Size in Multi-domain Reinforcement Learning for Diffusion Large Language Models", + titleZh: "Block-R1:重思多领域强化学习中扩散大语言模型的块尺寸", + authors: "Yan Jiang et al.", + arxivId: "2605.11726", + tags: ["Diffusion LM", "Reasoning", "RLHF"], + why: "Domain block-size conflict bottlenecks multi-domain dLLM RL post-training; Block-R1-41K assigns per-sample optimal block sizes to resolve cross-domain tension in GRPO.", + whyZh: "块尺寸冲突是多领域dLLM RL后训练的关键瓶颈,Block-R1-41K为每个样本分配最优块尺寸,有效缓解GRPO跨领域训练张力。", + pick: true, + }, + { + date: "2026-05-17", + title: "Break the Block: Dynamic-size Reasoning Blocks for Diffusion LLMs via Monotonic Entropy Descent with RL", + titleZh: "打破块限制:单调熵下降强化学习驱动的扩散LLM动态推理块", + authors: "Yan Jiang et al.", + arxivId: "2605.02263", + tags: ["Diffusion LM", "Reasoning", "Efficient Inference"], + why: "Fixed block size in dLLMs breaks reasoning coherence; monotonic entropy descent RL learns adaptive block boundaries, improving quality across diverse reasoning tasks.", + whyZh: "固定块尺寸破坏扩散LLM推理连贯性,单调熵下降RL框架自适应学习块边界,在多类推理任务中显著提升生成质量。", + }, + { + date: "2026-05-17", + title: "Continuous Latent Diffusion Language Model", + titleZh: "连续潜变量扩散语言模型(Cola DLM)", + authors: "Hongcan Guo et al.", + arxivId: "2605.06548", + tags: ["Diffusion LM", "Theory"], + why: "Hierarchical Text VAE + block-causal DiT models text in continuous latent space — bypasses the discrete token bottleneck entirely, opening a new paradigm for diffusion LMs.", + whyZh: "分层Text VAE与块因果DiT在连续潜空间中建模文本,彻底绕开离散token瓶颈,为扩散语言模型开辟全新范式。", + }, + { + date: "2026-05-17", + title: "LatentRAG: Latent Reasoning and Retrieval for Efficient Agentic RAG", + titleZh: "LatentRAG:面向高效智能体RAG的隐空间推理与检索", + authors: "Yijia Zheng and Marcel Worring", + arxivId: "2605.06285", + tags: ["RAG", "Agent", "Efficient Inference"], + why: "Latent-space reasoning and retrieval replaces token-by-token CoT in agentic RAG — ~90% latency cut across 7 benchmarks while matching explicit reasoning accuracy.", + whyZh: "在连续潜空间中完成智能体RAG的推理与检索,完全替代逐token生成,7项基准上匹配显式推理精度,延迟降低约90%。", + }, { date: "2026-04-23", title: "DeepSeek-V4 Technical Report",