[{"data":1,"prerenderedAt":343},["ShallowReactive",2],{"header-counts":3,"footer-counts":6,"wiki-context-rot":9},{"tools":4,"reviews":5},70,12,{"tools":4,"reviews":5,"playbooks":7,"news":8},15,13,{"id":10,"title":11,"body":12,"category":321,"description":310,"extension":322,"meta":323,"navigation":324,"path":325,"published":326,"relatedModels":327,"relatedTools":330,"seo":333,"slug":334,"stem":335,"summary":336,"tags":337,"updated":326,"__hash__":342},"wiki\u002Fwiki\u002Fcontext-rot.md","Context Rot（上下文腐烂）",{"type":13,"value":14,"toc":309},"minimark",[15,20,33,40,95,98,101,104,112,119,122,125,153,156,159,162,201,209,212,242,245,271,274],[16,17,19],"h2",{"id":18},"什么是-context-rot","什么是 Context Rot",[21,22,23,24,28,29,32],"p",{},"Context Rot（上下文腐烂）指",[25,26,27],"strong",{},"随着输入 token 增多，LLM 输出质量持续下降","的现象——而且是在远没到上下文窗口上限时就开始下降。这个词由 Chroma 在 2025 年的技术报告中提出并系统验证：他们测了 18 个前沿模型（含 GPT-4.1、Claude 4、Gemini 2.5、Qwen3），",[25,30,31],{},"没有一个例外","，全都随输入变长而变差。",[21,34,35,36,39],{},"关键区分：Context Rot ",[25,37,38],{},"不是","上下文溢出。",[41,42,43,58],"table",{},[44,45,46],"thead",{},[47,48,49,52,55],"tr",{},[50,51],"th",{},[50,53,54],{},"上下文溢出",[50,56,57],{},"Context Rot",[59,60,61,73,84],"tbody",{},[47,62,63,67,70],{},[64,65,66],"td",{},"触发",[64,68,69],{},"超过模型最大 token 上限",[64,71,72],{},"远未到上限就发生",[47,74,75,78,81],{},[64,76,77],{},"表现",[64,79,80],{},"截断\u002F拒绝，二元失败",[64,82,83],{},"渐进式质量下降",[47,85,86,89,92],{},[64,87,88],{},"例子",[64,90,91],{},"200K 窗口塞 210K",[64,93,94],{},"200K 窗口塞 50K 就开始退化",[21,96,97],{},"一个 200K 窗口的模型，可能在 50K token 时就出现明显退化。下降是连续的，不是悬崖式的。",[16,99,100],{"id":100},"为什么重要",[21,102,103],{},"很多团队的误区是「我选了 128K \u002F 1M 上下文的模型，应该够用了」，然后塞满上下文却纳闷质量为什么变差。Context Rot 的核心洞察是：",[105,106,107],"blockquote",{},[21,108,109],{},[25,110,111],{},"容量是错误的指标，信噪比才决定输出质量。",[21,113,114,115,118],{},"对编码 agent 来说这尤其致命——Context Rot 是 agent 的",[25,116,117],{},"首要失败模式","，比模型能力、推理能力更关键。模型本身够聪明能解决问题，前提是上下文保持干净。但 agent 在搜索、探索、回溯过程中会不断累积噪声，这些噪声直接拖垮后续每一步的输出。",[16,120,121],{"id":121},"表现形式",[21,123,124],{},"Chroma 的研究揭示了几个非均匀退化的维度：",[126,127,128,135,141,147],"ul",{},[129,130,131,134],"li",{},[25,132,133],{},"needle-question 相似度","：问题和目标信息的语义匹配度越低，越长的上下文越捞不出来（经典 NIAH 测的是字面匹配，掩盖了这个问题）",[129,136,137,140],{},[25,138,139],{},"干扰项（distractors）","：上下文里有相似但无关的内容时，越长越容易被带偏",[129,142,143,146],{},[25,144,145],{},"lost-in-the-middle","：信息放在长上下文中间时最容易被忽略，准确率可掉 30%+",[129,148,149,152],{},[25,150,151],{},"噪声类型有别","：互相抵消的操作（如成对的增删）比单纯的无关文本（如 print 语句）更严重地拖垮性能",[21,154,155],{},"社区的真实观察也印证这点：Claude Code 在多次 compaction 后会越来越差；与其让模型在长上下文里硬找，不如先让它总结、再基于总结提问、需要时再喂原文（RAG 式或简单 agent 循环）效果更好。",[16,157,158],{"id":158},"怎么缓解",[21,160,161],{},"Context Rot 是「上下文工程」存在的根本原因。常见手段：",[163,164,165,171,183,189,195],"ol",{},[129,166,167,170],{},[25,168,169],{},"控制信噪比","：只放当前任务真正需要的内容，无关历史及时清掉",[129,172,173,176,177,182],{},[25,174,175],{},"RAG 检索","：用 ",[178,179,181],"a",{"href":180},"\u002Fwiki\u002Frag.html","RAG"," 按需取相关片段，而不是一股脑全塞进去",[129,184,185,188],{},[25,186,187],{},"总结压缩","：长对话\u002F长文档先总结，基于总结工作，需要细节时再拉原文",[129,190,191,194],{},[25,192,193],{},"分而治之","：把大任务拆成多个干净上下文的子任务（多 agent 编排实测比单 agent 提升显著）",[129,196,197,200],{},[25,198,199],{},"主动清理","：agent 探索产生的噪声（失败的尝试、无关的文件内容）用完就清，别让它留在上下文里腐烂",[21,202,203,204,208],{},"这些都属于 ",[178,205,207],{"href":206},"\u002Fwiki\u002Fcontext-engineering.html","Context Engineering"," 的范畴。",[16,210,211],{"id":211},"对模型选型的启示",[126,213,214,220,226],{},[129,215,216,219],{},[25,217,218],{},"别只看上下文窗口大小","：1M 窗口不等于能有效用满 1M。看的是模型在长上下文下的实际保持能力。",[129,221,222,225],{},[25,223,224],{},"长上下文 ≠ 不需要 RAG","：「长上下文会杀死 RAG」是误解。恰恰相反，Context Rot 证明了即便有大窗口，按需检索 + 控制信噪比仍是刚需。",[129,227,228,231,232,236,237,241],{},[25,229,230],{},"compaction 不是免费的","：",[178,233,235],{"href":234},"\u002Fmodels\u002Fclaude-opus-4-5.html","Claude Opus 4.5","、",[178,238,240],{"href":239},"\u002Fmodels\u002Fgpt-5-1-codex-max.html","GPT-5.1-Codex-Max"," 的 compaction 机制能延长工作时长，但每次压缩都可能损失信息，长任务质量仍会缓慢下滑。",[16,243,244],{"id":244},"常见误区",[126,246,247,253,259,265],{},[129,248,249,252],{},[25,250,251],{},"「上下文没满就没事」","：错。退化在远未满时就开始，容量充足不代表质量稳定。",[129,254,255,258],{},[25,256,257],{},"「换个更大窗口的模型就行」","：错。Chroma 测的 18 个模型全部退化，换大窗口治标不治本，得做上下文工程。",[129,260,261,264],{},[25,262,263],{},"「RAG 过时了」","：错。Context Rot 反而强化了 RAG 和上下文工程的价值。",[129,266,267,270],{},[25,268,269],{},"「把所有文档一次喂进去最省事」","：短期省事，长期质量差。先总结后按需取原文通常更准。",[16,272,273],{"id":273},"延伸阅读",[126,275,276,282,292,300],{},[129,277,278,279,281],{},"核心方法：",[178,280,207],{"href":206},"——怎么给模型喂对上下文",[129,283,284,285,287,288],{},"检索：",[178,286,181],{"href":180}," \u002F ",[178,289,291],{"href":290},"\u002Fwiki\u002Ffine-tuning-vs-rag.html","Fine-tuning vs RAG",[129,293,294,295,299],{},"项目约定：",[178,296,298],{"href":297},"\u002Fwiki\u002Fagents-md.html","AGENTS.md","——别把它写太长，否则也是一种 Context Rot",[129,301,302,303,287,305],{},"相关模型：",[178,304,235],{"href":234},[178,306,308],{"href":307},"\u002Fmodels\u002Fgemini-3-pro.html","Gemini 3 Pro",{"title":310,"searchDepth":311,"depth":311,"links":312},"",3,[313,315,316,317,318,319,320],{"id":18,"depth":314,"text":19},2,{"id":100,"depth":314,"text":100},{"id":121,"depth":314,"text":121},{"id":158,"depth":314,"text":158},{"id":211,"depth":314,"text":211},{"id":244,"depth":314,"text":244},{"id":273,"depth":314,"text":273},"concept","md",{},true,"\u002Fwiki\u002Fcontext-rot","2026-06-28",[328,329],"claude-opus-4-5","gemini-3-pro",[331,332],"coding\u002Fcli\u002Fclaude-code","coding\u002Fapi\u002Fopenrouter",{"title":11,"description":310},"context-rot","wiki\u002Fcontext-rot","随着输入 token 增多，LLM 输出质量会持续下降——即便远没到上下文窗口上限。Chroma 实测 18 个前沿模型无一幸免。对编码 agent 而言，这是首要失败模式，比模型能力更关键。",[57,338,339,340,341],"长上下文","上下文工程","LLM","Agent","uXMG-NQvZBw7YcNrS4wCInclUQ496ZFEkWC3zXogaxo",1782663749263]