Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions src/lib/daily.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,69 @@ export interface DailyPaper {
}

export const dailyPapers: DailyPaper[] = [
{
date: "2026-05-17",
title: "Beyond Reasoning: Reinforcement Learning Unlocks Parametric Knowledge in LLMs",
titleZh: "超越推理:强化学习激活LLM中的参数知识",
authors: "Wanli Yang et al.",
arxivId: "2605.07153",
tags: ["Reasoning", "RLHF"],
why: "RL improves factual recall ~27% in zero-shot QA without CoT — by redistributing probability mass over existing knowledge, not acquiring new facts. Challenges the 'RL = reasoning' framing.",
whyZh: "RL在无CoT零样本问答中提升约27%事实召回率,机制是重分配已有知识概率质量而非习得新知,有力挑战「强化学习=推理」叙事。",
pick: true,
},
{
date: "2026-05-17",
title: "Theoretical Limits of Language Model Alignment",
titleZh: "语言模型对齐的理论极限",
authors: "Lucas Monteiro Paes et al.",
arxivId: "2605.07105",
tags: ["Theory", "Alignment"],
why: "Apple derives the KL-reward Pareto frontier for alignment in closed form — best-of-N nearly reaches theoretical optimal while PPO and GRPO remain far suboptimal.",
whyZh: "Apple推导对齐的信息论KL-奖励帕累托前沿,best-of-N接近理论最优,PPO/GRPO仍大幅低于极限,为对齐研究提供理论锚点。",
pick: true,
},
{
date: "2026-05-17",
title: "Block-R1: Rethinking the Role of Block Size in Multi-domain Reinforcement Learning for Diffusion Large Language Models",
titleZh: "Block-R1:重思多领域强化学习中扩散大语言模型的块尺寸",
authors: "Yan Jiang et al.",
arxivId: "2605.11726",
tags: ["Diffusion LM", "Reasoning", "RLHF"],
why: "Domain block-size conflict bottlenecks multi-domain dLLM RL post-training; Block-R1-41K assigns per-sample optimal block sizes to resolve cross-domain tension in GRPO.",
whyZh: "块尺寸冲突是多领域dLLM RL后训练的关键瓶颈,Block-R1-41K为每个样本分配最优块尺寸,有效缓解GRPO跨领域训练张力。",
pick: true,
},
{
date: "2026-05-17",
title: "Break the Block: Dynamic-size Reasoning Blocks for Diffusion LLMs via Monotonic Entropy Descent with RL",
titleZh: "打破块限制:单调熵下降强化学习驱动的扩散LLM动态推理块",
authors: "Yan Jiang et al.",
arxivId: "2605.02263",
tags: ["Diffusion LM", "Reasoning", "Efficient Inference"],
why: "Fixed block size in dLLMs breaks reasoning coherence; monotonic entropy descent RL learns adaptive block boundaries, improving quality across diverse reasoning tasks.",
whyZh: "固定块尺寸破坏扩散LLM推理连贯性,单调熵下降RL框架自适应学习块边界,在多类推理任务中显著提升生成质量。",
},
{
date: "2026-05-17",
title: "Continuous Latent Diffusion Language Model",
titleZh: "连续潜变量扩散语言模型(Cola DLM)",
authors: "Hongcan Guo et al.",
arxivId: "2605.06548",
tags: ["Diffusion LM", "Theory"],
why: "Hierarchical Text VAE + block-causal DiT models text in continuous latent space — bypasses the discrete token bottleneck entirely, opening a new paradigm for diffusion LMs.",
whyZh: "分层Text VAE与块因果DiT在连续潜空间中建模文本,彻底绕开离散token瓶颈,为扩散语言模型开辟全新范式。",
},
{
date: "2026-05-17",
title: "LatentRAG: Latent Reasoning and Retrieval for Efficient Agentic RAG",
titleZh: "LatentRAG:面向高效智能体RAG的隐空间推理与检索",
authors: "Yijia Zheng and Marcel Worring",
arxivId: "2605.06285",
tags: ["RAG", "Agent", "Efficient Inference"],
why: "Latent-space reasoning and retrieval replaces token-by-token CoT in agentic RAG — ~90% latency cut across 7 benchmarks while matching explicit reasoning accuracy.",
whyZh: "在连续潜空间中完成智能体RAG的推理与检索,完全替代逐token生成,7项基准上匹配显式推理精度,延迟降低约90%。",
},
{
date: "2026-04-23",
title: "DeepSeek-V4 Technical Report",
Expand Down