[{"data":1,"prerenderedAt":10740},["ShallowReactive",2],{"header-counts":3,"wiki-list":6,"footer-counts":10739},{"tools":4,"reviews":5},65,7,[7,797,1515,2071,2841,3456,4438,5062,6113,6981,7722,8425,9187,10075],{"id":8,"title":9,"body":10,"category":779,"description":108,"extension":780,"meta":781,"navigation":310,"path":782,"published":783,"relatedModels":784,"relatedTools":785,"seo":789,"slug":790,"stem":791,"summary":792,"tags":793,"updated":783,"__hash__":796},"wiki\u002Fwiki\u002Fa2a.md","A2A (Agent-to-Agent Protocol)",{"type":11,"value":12,"toc":763},"minimark",[13,18,27,36,39,42,55,58,85,91,94,99,102,201,212,215,223,226,229,341,352,356,443,446,450,453,523,530,533,536,586,589,674,678,707,710,713,724,731,734,759],[14,15,17],"h2",{"id":16},"什么是-a2a","什么是 A2A",[19,20,21,22,26],"p",{},"A2A（Agent-to-Agent Protocol）是 Google 于 2025 年提出的开放协议，解决",[23,24,25],"strong",{},"不同 Agent 之间如何通信和协作","的问题。",[19,28,29,30,35],{},"如果说 ",[31,32,34],"a",{"href":33},"\u002Fwiki\u002Fmcp.html","MCP"," 解决的是\"Agent 如何连接工具\"，那 A2A 解决的是\"Agent 如何连接其他 Agent\"。",[14,37,38],{"id":38},"解决什么问题",[19,40,41],{},"当前 Agent 生态是孤岛：",[43,44,45,49,52],"ul",{},[46,47,48],"li",{},"Coze 上的 Agent 不能调用 Dify 上的 Agent",[46,50,51],{},"Claude Code 不能委派任务给 Manus",[46,53,54],{},"企业自建 Agent 不能与外部 Agent 协作",[19,56,57],{},"A2A 提供标准化的通信协议，让任何 Agent 都能：",[59,60,61,67,73,79],"ol",{},[46,62,63,66],{},[23,64,65],{},"发现"," — 找到其他 Agent 的能力",[46,68,69,72],{},[23,70,71],{},"协商"," — 确认对方能否完成任务",[46,74,75,78],{},[23,76,77],{},"委派"," — 把子任务交给最合适的 Agent",[46,80,81,84],{},[23,82,83],{},"回调"," — 接收其他 Agent 的结果",[86,87,88],"blockquote",{},[19,89,90],{},"这与 SOA \u002F 微服务时代解决\"服务怎么互相找到对方\"是同一类问题。A2A 借鉴了 OpenAPI（自描述）和 OAuth（鉴权）的成熟模式，但把对象从\"服务\"换成\"Agent\"，多出了「长任务」「流式回包」「人在回路」三个 Agent 特有需求。",[14,92,93],{"id":93},"工作原理",[95,96,98],"h3",{"id":97},"agent-card","Agent Card",[19,100,101],{},"每个 Agent 暴露一个 Agent Card（类似名片），描述自己的能力：",[103,104,109],"pre",{"className":105,"code":106,"language":107,"meta":108,"style":108},"language-json shiki shiki-themes github-light github-dark","{\n  \"name\": \"code-review-agent\",\n  \"description\": \"Reviews code for bugs, security, and style\",\n  \"capabilities\": [\"code-review\", \"security-scan\"],\n  \"endpoint\": \"https:\u002F\u002Fexample.com\u002Fa2a\u002Fagent\",\n  \"auth\": \"bearer-token\"\n}\n","json","",[110,111,112,121,138,151,172,185,196],"code",{"__ignoreMap":108},[113,114,117],"span",{"class":115,"line":116},"line",1,[113,118,120],{"class":119},"sVt8B","{\n",[113,122,124,128,131,135],{"class":115,"line":123},2,[113,125,127],{"class":126},"sj4cs","  \"name\"",[113,129,130],{"class":119},": ",[113,132,134],{"class":133},"sZZnC","\"code-review-agent\"",[113,136,137],{"class":119},",\n",[113,139,141,144,146,149],{"class":115,"line":140},3,[113,142,143],{"class":126},"  \"description\"",[113,145,130],{"class":119},[113,147,148],{"class":133},"\"Reviews code for bugs, security, and style\"",[113,150,137],{"class":119},[113,152,154,157,160,163,166,169],{"class":115,"line":153},4,[113,155,156],{"class":126},"  \"capabilities\"",[113,158,159],{"class":119},": [",[113,161,162],{"class":133},"\"code-review\"",[113,164,165],{"class":119},", ",[113,167,168],{"class":133},"\"security-scan\"",[113,170,171],{"class":119},"],\n",[113,173,175,178,180,183],{"class":115,"line":174},5,[113,176,177],{"class":126},"  \"endpoint\"",[113,179,130],{"class":119},[113,181,182],{"class":133},"\"https:\u002F\u002Fexample.com\u002Fa2a\u002Fagent\"",[113,184,137],{"class":119},[113,186,188,191,193],{"class":115,"line":187},6,[113,189,190],{"class":126},"  \"auth\"",[113,192,130],{"class":119},[113,194,195],{"class":133},"\"bearer-token\"\n",[113,197,198],{"class":115,"line":5},[113,199,200],{"class":119},"}\n",[19,202,203,204,207,208,211],{},"Agent Card 一般挂在固定路径（如 ",[110,205,206],{},"\u002F.well-known\u002Fagent.json","），客户端通过 HTTP GET 拉取——和 OpenAPI 的 ",[110,209,210],{},"swagger.json"," 一个套路。",[95,213,214],{"id":214},"任务流程",[103,216,221],{"className":217,"code":219,"language":220},[218],"language-text","Agent A: \"我需要代码审查\"\n  ↓\n发现 Agent B 的 Agent Card → B 能做代码审查\n  ↓\nA 向 B 发送任务请求（附带代码）\n  ↓\nB 接受任务，开始审查\n  ↓\nB 返回审查结果给 A\n  ↓\nA 整合结果，继续主任务\n","text",[110,222,219],{"__ignoreMap":108},[95,224,225],{"id":225},"一次请求长什么样",[19,227,228],{},"A2A 在 HTTP 上跑 JSON-RPC 风格的方法，常见三个：",[103,230,234],{"className":231,"code":232,"language":233,"meta":108,"style":108},"language-jsonc shiki shiki-themes github-light github-dark","\u002F\u002F 1. 发起任务（异步，立刻返回 taskId）\nPOST https:\u002F\u002Fexample.com\u002Fa2a\n{\n  \"jsonrpc\": \"2.0\", \"id\": 1,\n  \"method\": \"tasks\u002Fsend\",\n  \"params\": {\n    \"id\": \"task-001\",\n    \"message\": {\n      \"role\": \"user\",\n      \"parts\": [{ \"type\": \"text\", \"text\": \"Review this diff: ...\" }]\n    }\n  }\n}\n\n\u002F\u002F 2. 查询任务状态（轮询或 SSE 订阅）\n{ \"method\": \"tasks\u002Fget\", \"params\": { \"id\": \"task-001\" } }\n\n\u002F\u002F 3. 取消任务\n{ \"method\": \"tasks\u002Fcancel\", \"params\": { \"id\": \"task-001\" } }\n","jsonc",[110,235,236,241,246,250,255,260,265,270,276,282,288,294,300,305,312,318,324,329,335],{"__ignoreMap":108},[113,237,238],{"class":115,"line":116},[113,239,240],{},"\u002F\u002F 1. 发起任务（异步，立刻返回 taskId）\n",[113,242,243],{"class":115,"line":123},[113,244,245],{},"POST https:\u002F\u002Fexample.com\u002Fa2a\n",[113,247,248],{"class":115,"line":140},[113,249,120],{},[113,251,252],{"class":115,"line":153},[113,253,254],{},"  \"jsonrpc\": \"2.0\", \"id\": 1,\n",[113,256,257],{"class":115,"line":174},[113,258,259],{},"  \"method\": \"tasks\u002Fsend\",\n",[113,261,262],{"class":115,"line":187},[113,263,264],{},"  \"params\": {\n",[113,266,267],{"class":115,"line":5},[113,268,269],{},"    \"id\": \"task-001\",\n",[113,271,273],{"class":115,"line":272},8,[113,274,275],{},"    \"message\": {\n",[113,277,279],{"class":115,"line":278},9,[113,280,281],{},"      \"role\": \"user\",\n",[113,283,285],{"class":115,"line":284},10,[113,286,287],{},"      \"parts\": [{ \"type\": \"text\", \"text\": \"Review this diff: ...\" }]\n",[113,289,291],{"class":115,"line":290},11,[113,292,293],{},"    }\n",[113,295,297],{"class":115,"line":296},12,[113,298,299],{},"  }\n",[113,301,303],{"class":115,"line":302},13,[113,304,200],{},[113,306,308],{"class":115,"line":307},14,[113,309,311],{"emptyLinePlaceholder":310},true,"\n",[113,313,315],{"class":115,"line":314},15,[113,316,317],{},"\u002F\u002F 2. 查询任务状态（轮询或 SSE 订阅）\n",[113,319,321],{"class":115,"line":320},16,[113,322,323],{},"{ \"method\": \"tasks\u002Fget\", \"params\": { \"id\": \"task-001\" } }\n",[113,325,327],{"class":115,"line":326},17,[113,328,311],{"emptyLinePlaceholder":310},[113,330,332],{"class":115,"line":331},18,[113,333,334],{},"\u002F\u002F 3. 取消任务\n",[113,336,338],{"class":115,"line":337},19,[113,339,340],{},"{ \"method\": \"tasks\u002Fcancel\", \"params\": { \"id\": \"task-001\" } }\n",[19,342,343,344,347,348,351],{},"任务状态机：",[110,345,346],{},"submitted → working → input-required → completed \u002F failed \u002F canceled","。",[110,349,350],{},"input-required"," 是 A2A 比传统 RPC 多出来的一档——长任务跑到一半需要补输入时，可以挂起等回包。",[14,353,355],{"id":354},"a2a-vs-mcp","A2A vs MCP",[357,358,359,374],"table",{},[360,361,362],"thead",{},[363,364,365,369,371],"tr",{},[366,367,368],"th",{},"维度",[366,370,34],{},[366,372,373],{},"A2A",[375,376,377,389,400,411,422,433],"tbody",{},[363,378,379,383,386],{},[380,381,382],"td",{},"连接对象",[380,384,385],{},"Agent ↔ 工具",[380,387,388],{},"Agent ↔ Agent",[363,390,391,394,397],{},[380,392,393],{},"提出者",[380,395,396],{},"Anthropic",[380,398,399],{},"Google",[363,401,402,405,408],{},[380,403,404],{},"通信模式",[380,406,407],{},"同步调用",[380,409,410],{},"异步任务 + 流式回包",[363,412,413,416,419],{},[380,414,415],{},"状态",[380,417,418],{},"单次往返为主",[380,420,421],{},"长任务有完整生命周期",[363,423,424,427,430],{},[380,425,426],{},"适用场景",[380,428,429],{},"工具调用",[380,431,432],{},"任务委派",[363,434,435,438,441],{},[380,436,437],{},"关系",[380,439,440],{},"互补",[380,442,440],{},[19,444,445],{},"两者可以组合使用：Agent A 通过 A2A 委派任务给 Agent B，Agent B 通过 MCP 调用工具完成任务。",[14,447,449],{"id":448},"a2a-vs-webhook-普通-rest","A2A vs Webhook \u002F 普通 REST",[19,451,452],{},"新人常问\"A2A 和我自己写个 Webhook 调对方 API 有啥不一样\"——主要差异在三处：",[357,454,455,466],{},[360,456,457],{},[363,458,459,461,464],{},[366,460,368],{},[366,462,463],{},"自写 REST\u002FWebhook",[366,465,373],{},[375,467,468,479,490,501,512],{},[363,469,470,473,476],{},[380,471,472],{},"能力发现",[380,474,475],{},"自己读对方文档",[380,477,478],{},"Agent Card 标准化",[363,480,481,484,487],{},[380,482,483],{},"长任务支持",[380,485,486],{},"自己实现轮询\u002F回调",[380,488,489],{},"协议内置任务状态机",[363,491,492,495,498],{},[380,493,494],{},"多 Agent 编排",[380,496,497],{},"每对 Agent 单独适配",[380,499,500],{},"一套协议接入所有 Agent",[363,502,503,506,509],{},[380,504,505],{},"鉴权",[380,507,508],{},"各家一套",[380,510,511],{},"标准 OAuth2 \u002F Bearer",[363,513,514,517,520],{},[380,515,516],{},"流式中间态",[380,518,519],{},"自己定 SSE 格式",[380,521,522],{},"协议规定 message parts 流式回包",[19,524,525,526,529],{},"简言之：能用 REST 解决的双方对接，A2A 并不省事；",[23,527,528],{},"它的价值在 N×M 互联","——一个 Agent 同时要调多家 Agent 服务时。",[14,531,532],{"id":532},"五分钟接入清单",[19,534,535],{},"第一次让你的 Agent 接受 A2A 调用，建议这个顺序：",[59,537,538,544,553,570,576],{},[46,539,540,543],{},[23,541,542],{},"挑一个能力点先暴露","——不要一上来就 mapping 所有内部函数，先选一个无副作用、文档清楚的（比如「查某 ID 的状态」）。",[46,545,546,549,550,552],{},[23,547,548],{},"写 Agent Card","——name \u002F description \u002F capabilities \u002F endpoint \u002F auth 五个字段先填上，跑个 GET ",[110,551,206],{}," 能拿到 200 就算第一关过。",[46,554,555,558,559,562,563,562,566,569],{},[23,556,557],{},"实现最小三方法","——",[110,560,561],{},"tasks\u002Fsend"," \u002F ",[110,564,565],{},"tasks\u002Fget",[110,567,568],{},"tasks\u002Fcancel","。返回先用 mock 数据，验证客户端能正确解析。",[46,571,572,575],{},[23,573,574],{},"接真实业务","——把 mock 换成真实逻辑，注意区分同步立返和异步长任务两种返回。",[46,577,578,581,582,585],{},[23,579,580],{},"加 Auth","——Bearer Token 起步，生产环境上 OAuth2。",[23,583,584],{},"不要","在没鉴权前把 Agent Card 暴露到公网。",[14,587,588],{"id":588},"常见踩坑",[43,590,591,612,624,644,657],{},[46,592,593,596,597,600,601,604,605,600,608,611],{},[23,594,595],{},"Agent Card 字段写\"我什么都能干\"","——客户端会按 capabilities 路由，写得太宽会被发各种意外任务。capabilities 写具体动词（",[110,598,599],{},"code-review","、",[110,602,603],{},"generate-image","），不要写形容词（",[110,606,607],{},"smart",[110,609,610],{},"powerful","）。",[46,613,614,619,620,623],{},[23,615,616,617],{},"同步阻塞 ",[110,618,561],{},"——任务超过几秒就应该立即返回 ",[110,621,622],{},"submitted"," 让对方轮询\u002F订阅；阻塞返回会让客户端的 timeout 各种炸。",[46,625,626,629,630,633,634,637,638,633,641,643],{},[23,627,628],{},"状态机偷懒","——只用 ",[110,631,632],{},"completed"," 和 ",[110,635,636],{},"failed","，跳过 ",[110,639,640],{},"working",[110,642,350],{},"，客户端就没法显示进度，也没法做人在回路。",[46,645,646,649,650,653,654,656],{},[23,647,648],{},"没做幂等","——同一个 ",[110,651,652],{},"taskId"," 被重发（网络重试），如果你的实现会再跑一次，副作用就会重复执行。",[110,655,561],{}," 必须按 ID 去重。",[46,658,659,662,663,666,667,670,671,347],{},[23,660,661],{},"Agent Card 缓存太久","——能力升级了但客户端拿到的还是老 card。带 ",[110,664,665],{},"ETag"," 和合理的 ",[110,668,669],{},"Cache-Control","，或者在 Card 里放 ",[110,672,673],{},"version",[14,675,677],{"id":676},"什么场景不该用-a2a","什么场景不该用 A2A",[43,679,680,686,692,698],{},[46,681,682,685],{},[23,683,684],{},"只有两个 Agent 长期一对一调用","——直接 REST 调用更轻，A2A 是为 N×M 准备的。",[46,687,688,691],{},[23,689,690],{},"延迟敏感（\u003C 100ms）的内部调用","——A2A 那套任务状态机 + 状态轮询有协议开销，进程内函数调用更合适。",[46,693,694,697],{},[23,695,696],{},"强一致事务","——A2A 是异步任务模型，不保证 ACID。账户转账这种场景应该用传统 RPC + 分布式事务。",[46,699,700,703,704,706],{},[23,701,702],{},"纯工具调用而非 Agent 协作","——用 ",[31,705,34],{"href":33},"，工具调用走 A2A 是杀鸡用牛刀。",[14,708,709],{"id":709},"生态现状",[19,711,712],{},"A2A 协议较新（2025 年提出），目前支持的平台：",[43,714,715,718,721],{},[46,716,717],{},"Google AI Studio \u002F Vertex AI",[46,719,720],{},"部分 LangChain Agent",[46,722,723],{},"尚未被 Coze \u002F Dify 等主流平台广泛支持",[19,725,726,727,730],{},"未来如果 A2A 普及，将出现\"Agent 市场\"——不同平台上的 Agent 可以自由组合协作。当下阶段（2026 中），",[23,728,729],{},"生产引入需谨慎","：协议本身在迭代，客户端 SDK 成熟度有限，先在内部 PoC 跑通了再考虑跨组织对接。",[14,732,733],{"id":733},"延伸阅读",[43,735,736,743,751],{},[46,737,738,739,742],{},"工具协议：",[31,740,741],{"href":33},"MCP（Model Context Protocol）","——Agent ↔ 工具的标准",[46,744,745,746,750],{},"基础概念：",[31,747,749],{"href":748},"\u002Fwiki\u002Fai-agent.html","AI Agent","——什么是 Agent、有哪些分类",[46,752,753,754,758],{},"工具调用：",[31,755,757],{"href":756},"\u002Fwiki\u002Ffunction-calling.html","Function Calling","——Agent 调外部函数的底层机制",[760,761,762],"style",{},"html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}",{"title":108,"searchDepth":140,"depth":140,"links":764},[765,766,767,772,773,774,775,776,777,778],{"id":16,"depth":123,"text":17},{"id":38,"depth":123,"text":38},{"id":93,"depth":123,"text":93,"children":768},[769,770,771],{"id":97,"depth":140,"text":98},{"id":214,"depth":140,"text":214},{"id":225,"depth":140,"text":225},{"id":354,"depth":123,"text":355},{"id":448,"depth":123,"text":449},{"id":532,"depth":123,"text":532},{"id":588,"depth":123,"text":588},{"id":676,"depth":123,"text":677},{"id":709,"depth":123,"text":709},{"id":733,"depth":123,"text":733},"protocol","md",{},"\u002Fwiki\u002Fa2a","2026-06-21",null,[786,787,788],"agent\u002Fplatform\u002Fcoze","agent\u002Fplatform\u002Fdify","agent\u002Fplatform\u002Flangflow",{"title":9,"description":108},"a2a","wiki\u002Fa2a","Google 提出的 Agent 间通信协议，让不同平台、不同框架的 AI Agent 能互相发现、协商、协作。",[373,794,795,399],"协议","Agent","La501G--sQWT60PYA--KHvN_CMg7oGI21WfbhHHLvtU",{"id":798,"title":799,"body":800,"category":1498,"description":108,"extension":780,"meta":1499,"navigation":310,"path":1500,"published":783,"relatedModels":1501,"relatedTools":1504,"seo":1508,"slug":1509,"stem":1510,"summary":1511,"tags":1512,"updated":783,"__hash__":1514},"wiki\u002Fwiki\u002Fai-agent.md","AI Agent（智能体）",{"type":11,"value":801,"toc":1474},[802,806,809,841,847,851,857,861,864,870,874,877,883,887,890,894,898,907,924,928,948,952,972,976,979,1082,1085,1089,1093,1150,1153,1197,1201,1271,1274,1277,1309,1313,1359,1366,1369,1401,1404,1448,1450],[14,803,805],{"id":804},"什么是-ai-agent","什么是 AI Agent",[19,807,808],{},"AI Agent（智能体）是一种 AI 系统，它能：",[59,810,811,817,823,829,835],{},[46,812,813,816],{},[23,814,815],{},"感知"," — 理解用户意图和当前环境",[46,818,819,822],{},[23,820,821],{},"规划"," — 把复杂目标拆解为可执行的步骤",[46,824,825,828],{},[23,826,827],{},"行动"," — 调用工具、API、代码来完成每一步",[46,830,831,834],{},[23,832,833],{},"观察"," — 获取行动结果，判断是否成功",[46,836,837,840],{},[23,838,839],{},"迭代"," — 根据结果调整计划，持续直到完成",[19,842,843,844,347],{},"与普通聊天机器人的核心区别：",[23,845,846],{},"Agent 能自主决策和执行，不只是对话",[14,848,850],{"id":849},"agent-架构","Agent 架构",[103,852,855],{"className":853,"code":854,"language":220},[218],"用户目标\n  ↓\n┌─────────────────────────────────┐\n│         Agent 循环              │\n│                                 │\n│  ① 规划：下一步做什么？         │\n│  ② 行动：调用工具\u002F代码\u002FAPI      │\n│  ③ 观察：解析返回结果           │\n│  ④ 反思：成功了吗？需要调整？   │\n│  ⑤ 循环或结束                   │\n│                                 │\n└─────────────────────────────────┘\n  ↓\n任务完成\n",[110,856,854],{"__ignoreMap":108},[95,858,860],{"id":859},"react-模式","ReAct 模式",[19,862,863],{},"最经典的 Agent 模式：Reasoning + Acting 交替进行。",[103,865,868],{"className":866,"code":867,"language":220},[218],"Thought: 用户要查上海的天气，我需要调用天气 API\nAction: call_weather_api(city=\"上海\")\nObservation: 上海今天 28°C，多云\nThought: 拿到天气数据了，可以回答用户了\nAnswer: 上海今天 28°C，多云，适合出行。\n",[110,869,867],{"__ignoreMap":108},[95,871,873],{"id":872},"plan-and-execute-模式","Plan-and-Execute 模式",[19,875,876],{},"先全局规划再逐步执行：",[103,878,881],{"className":879,"code":880,"language":220},[218],"Plan:\n  1. 搜索竞品信息\n  2. 提取价格数据\n  3. 生成对比表格\n  4. 写分析总结\n\nExecute step 1...\nExecute step 2...\n",[110,882,880],{"__ignoreMap":108},[95,884,886],{"id":885},"reflexion-self-correct-模式","Reflexion \u002F Self-Correct 模式",[19,888,889],{},"执行完一轮后让模型自己评审输出，发现问题再跑一次。代价是 token 翻倍，但能显著提升复杂任务的成功率。",[14,891,893],{"id":892},"agent-的关键能力","Agent 的关键能力",[95,895,897],{"id":896},"工具调用tool-use","工具调用（Tool Use）",[19,899,900,901,562,904,906],{},"Agent 通过 ",[31,902,903],{"href":756},"function calling",[31,905,34],{"href":33}," 调用外部工具：",[43,908,909,912,915,918,921],{},[46,910,911],{},"搜索引擎（Google \u002F Bing API）",[46,913,914],{},"代码执行（沙箱 Python \u002F Node.js）",[46,916,917],{},"文件操作（读写本地文件）",[46,919,920],{},"API 调用（HTTP 请求）",[46,922,923],{},"浏览器自动化（Puppeteer \u002F Playwright）",[95,925,927],{"id":926},"记忆memory","记忆（Memory）",[43,929,930,936,942],{},[46,931,932,935],{},[23,933,934],{},"短期记忆"," — 当前对话上下文（在 context window 内）",[46,937,938,941],{},[23,939,940],{},"长期记忆"," — 跨会话的知识存储（向量数据库）",[46,943,944,947],{},[23,945,946],{},"工作记忆"," — 当前任务的中间状态（变量、文件、中间结果）",[95,949,951],{"id":950},"规划planning","规划（Planning）",[43,953,954,960,966],{},[46,955,956,959],{},[23,957,958],{},"任务分解"," — 把\"做一个网站\"拆成\"设计→前端→后端→部署\"",[46,961,962,965],{},[23,963,964],{},"自我反思"," — 执行失败后分析原因，调整策略",[46,967,968,971],{},[23,969,970],{},"动态重规划"," — 发现原计划不可行时及时调整",[14,973,975],{"id":974},"agent-vs-copilot-vs-chatbot-vs-rpa","Agent vs Copilot vs Chatbot vs RPA",[19,977,978],{},"经常混淆的四个东西，一张表区分：",[357,980,981,998],{},[360,982,983],{},[363,984,985,987,990,993,995],{},[366,986,368],{},[366,988,989],{},"Chatbot",[366,991,992],{},"Copilot",[366,994,795],{},[366,996,997],{},"RPA",[375,999,1000,1017,1034,1050,1065],{},[363,1001,1002,1005,1008,1011,1014],{},[380,1003,1004],{},"主动性",[380,1006,1007],{},"被动答",[380,1009,1010],{},"辅助建议",[380,1012,1013],{},"自主执行",[380,1015,1016],{},"按固定流程跑",[363,1018,1019,1022,1025,1028,1031],{},[380,1020,1021],{},"决策",[380,1023,1024],{},"无",[380,1026,1027],{},"用户决策",[380,1029,1030],{},"Agent 决策",[380,1032,1033],{},"规则决策",[363,1035,1036,1039,1041,1044,1047],{},[380,1037,1038],{},"工具",[380,1040,1024],{},[380,1042,1043],{},"少量",[380,1045,1046],{},"任意",[380,1048,1049],{},"固定脚本",[363,1051,1052,1055,1057,1059,1062],{},[380,1053,1054],{},"适应变化",[380,1056,1024],{},[380,1058,1024],{},[380,1060,1061],{},"强",[380,1063,1064],{},"弱（页面变就崩）",[363,1066,1067,1070,1073,1076,1079],{},[380,1068,1069],{},"例子",[380,1071,1072],{},"早期客服 bot",[380,1074,1075],{},"GitHub Copilot",[380,1077,1078],{},"Devin \u002F Manus",[380,1080,1081],{},"UiPath \u002F 影刀",[19,1083,1084],{},"Agent 是 RPA 的下一代——RPA 写死流程，Agent 看懂目标自己想办法。",[14,1086,1088],{"id":1087},"agent-分类","Agent 分类",[95,1090,1092],{"id":1091},"按-autonomy-程度分","按 autonomy 程度分",[357,1094,1095,1107],{},[360,1096,1097],{},[363,1098,1099,1102,1104],{},[366,1100,1101],{},"类型",[366,1103,1069],{},[366,1105,1106],{},"自主程度",[375,1108,1109,1118,1132],{},[363,1110,1111,1113,1115],{},[380,1112,992],{},[380,1114,1075],{},[380,1116,1117],{},"低：辅助人类，不自主",[363,1119,1120,1123,1129],{},[380,1121,1122],{},"Tool-use Agent",[380,1124,1125],{},[31,1126,1128],{"href":1127},"\u002Fcoding\u002Fcli\u002Fclaude-code.html","Claude Code",[380,1130,1131],{},"中：人类给目标，Agent 执行",[363,1133,1134,1137,1147],{},[380,1135,1136],{},"Autonomous Agent",[380,1138,1139,562,1143],{},[31,1140,1142],{"href":1141},"\u002Fcoding\u002Fagent\u002Fdevin.html","Devin",[31,1144,1146],{"href":1145},"\u002Fagent\u002Fgeneral\u002Fmanus.html","Manus",[380,1148,1149],{},"高：给目标，Agent 全程自主",[95,1151,1152],{"id":1152},"按场景分",[357,1154,1155,1163],{},[360,1156,1157],{},[363,1158,1159,1161],{},[366,1160,1101],{},[366,1162,1069],{},[375,1164,1165,1173,1181,1189],{},[363,1166,1167,1170],{},[380,1168,1169],{},"编程 Agent",[380,1171,1172],{},"Devin \u002F Cursor Composer \u002F Claude Code",[363,1174,1175,1178],{},[380,1176,1177],{},"通用 Agent",[380,1179,1180],{},"Manus \u002F OpenManus",[363,1182,1183,1186],{},[380,1184,1185],{},"桌面 Agent",[380,1187,1188],{},"OpenClaw \u002F AutoGLM",[363,1190,1191,1194],{},[380,1192,1193],{},"Agent 平台",[380,1195,1196],{},"Coze \u002F Dify \u002F FastGPT",[14,1198,1200],{"id":1199},"agent-平台对比","Agent 平台对比",[357,1202,1203,1216],{},[360,1204,1205],{},[363,1206,1207,1210,1213],{},[366,1208,1209],{},"平台",[366,1211,1212],{},"定位",[366,1214,1215],{},"特点",[375,1217,1218,1229,1240,1250,1261],{},[363,1219,1220,1223,1226],{},[380,1221,1222],{},"Coze（扣子）",[380,1224,1225],{},"字节出品",[380,1227,1228],{},"低代码、模板丰富、国内直连",[363,1230,1231,1234,1237],{},[380,1232,1233],{},"Dify",[380,1235,1236],{},"开源",[380,1238,1239],{},"可私有化部署、RAG 内置",[363,1241,1242,1245,1247],{},[380,1243,1244],{},"FastGPT",[380,1246,1236],{},[380,1248,1249],{},"知识库优先、RAG 最强",[363,1251,1252,1255,1258],{},[380,1253,1254],{},"元器",[380,1256,1257],{},"百度出品",[380,1259,1260],{},"接入百度生态",[363,1262,1263,1266,1268],{},[380,1264,1265],{},"n8n",[380,1267,1236],{},[380,1269,1270],{},"工作流自动化、可视化编排",[14,1272,1273],{"id":1273},"典型失败模式",[19,1275,1276],{},"知道 Agent 怎么失败，才知道怎么防。常见五种：",[59,1278,1279,1285,1291,1297,1303],{},[46,1280,1281,1284],{},[23,1282,1283],{},"死循环"," — 模型一直觉得\"再试一次就成\"，反复调同一个失败的工具。防御：硬性步数上限 + 重复检测。",[46,1286,1287,1290],{},[23,1288,1289],{},"跑偏目标"," — 多步任务中模型逐步把目标改成\"自己更想做的事\"。防御：每 N 步把原始目标重新塞回上下文。",[46,1292,1293,1296],{},[23,1294,1295],{},"过度自信的错误结果"," — 工具返回了错误数据但模型不质疑，照单全收输出给用户。防御：critical 工具加 verifier + 让模型显式标注信心。",[46,1298,1299,1302],{},[23,1300,1301],{},"副作用爆炸"," — 比如让 Agent 整理邮件，结果它\"自作主张\"开始删邮件。防御：默认 read-only，写操作单独 review。",[46,1304,1305,1308],{},[23,1306,1307],{},"上下文窗口耗尽"," — 长任务下 working memory 越来越大，最后超限。防御：定期总结压缩历史、把工具输出截断。",[14,1310,1312],{"id":1311},"自建-vs-用平台的决策","自建 vs 用平台的决策",[357,1314,1315,1325],{},[360,1316,1317],{},[363,1318,1319,1322],{},[366,1320,1321],{},"选择",[366,1323,1324],{},"适合场景",[375,1326,1327,1335,1343,1351],{},[363,1328,1329,1332],{},[380,1330,1331],{},"直接用 Coze \u002F Dify \u002F FastGPT",[380,1333,1334],{},"业务逻辑标准、内部使用、想快速看效果",[363,1336,1337,1340],{},[380,1338,1339],{},"LangChain \u002F LlamaIndex 框架",[380,1341,1342],{},"需要自定义工作流但不想从零写",[363,1344,1345,1348],{},[380,1346,1347],{},"自己写（基于 OpenAI SDK \u002F Anthropic SDK）",[380,1349,1350],{},"性能\u002F延迟敏感、有特殊业务约束、不想被框架绑死",[363,1352,1353,1356],{},[380,1354,1355],{},"Claude Code \u002F Cursor 这类 Coding Agent",[380,1357,1358],{},"目标本身就是写代码",[19,1360,1361,1362,1365],{},"经验法则：",[23,1363,1364],{},"先用平台跑 PoC，跑通了再考虑自建","。从零写一个生产级 Agent 框架的工作量被严重低估。",[14,1367,1368],{"id":1368},"当前局限",[59,1370,1371,1377,1383,1389,1395],{},[46,1372,1373,1376],{},[23,1374,1375],{},"可靠性"," — 多步 Agent 的错误率随步骤数指数增长（10 步 × 90% = 35% 成功率）",[46,1378,1379,1382],{},[23,1380,1381],{},"成本"," — 每步都要调用 LLM，复杂任务可能花费数美元",[46,1384,1385,1388],{},[23,1386,1387],{},"速度"," — LLM 推理延迟 × 步骤数 = 等待时间长",[46,1390,1391,1394],{},[23,1392,1393],{},"安全"," — Agent 能执行真实操作，出错后果严重（删文件、发邮件）",[46,1396,1397,1400],{},[23,1398,1399],{},"上下文窗口"," — 长任务可能超出 context window",[14,1402,1403],{"id":1403},"最佳实践",[59,1405,1406,1412,1418,1424,1430,1436,1442],{},[46,1407,1408,1411],{},[23,1409,1410],{},"人在回路（Human-in-the-loop）"," — 关键步骤让人类确认",[46,1413,1414,1417],{},[23,1415,1416],{},"沙箱执行"," — 代码在隔离环境运行，避免破坏系统",[46,1419,1420,1423],{},[23,1421,1422],{},"限制工具范围"," — 只给 Agent 必要的工具权限",[46,1425,1426,1429],{},[23,1427,1428],{},"检查点机制"," — 每步保存状态，失败可回滚",[46,1431,1432,1435],{},[23,1433,1434],{},"超时与重试"," — 防止 Agent 陷入无限循环",[46,1437,1438,1441],{},[23,1439,1440],{},"可观察性"," — 把每一步的 thought \u002F action \u002F observation 持久化，出问题能复盘",[46,1443,1444,1447],{},[23,1445,1446],{},"预算硬上限"," — token \u002F 步数 \u002F 钱都要有硬上限，到顶停止而不是无限重试",[14,1449,733],{"id":733},[43,1451,1452,1462,1467],{},[46,1453,1454,1455,1457,1458,1461],{},"协议层：",[31,1456,34],{"href":33},"（Agent ↔ 工具）\u002F ",[31,1459,373],{"href":1460},"\u002Fwiki\u002Fa2a.html","（Agent ↔ Agent）",[46,1463,1464,1465],{},"工具调用底层：",[31,1466,757],{"href":756},[46,1468,1469,1470],{},"上下文管理：",[31,1471,1473],{"href":1472},"\u002Fwiki\u002Fcontext-engineering.html","Context Engineering",{"title":108,"searchDepth":140,"depth":140,"links":1475},[1476,1477,1482,1487,1488,1492,1493,1494,1495,1496,1497],{"id":804,"depth":123,"text":805},{"id":849,"depth":123,"text":850,"children":1478},[1479,1480,1481],{"id":859,"depth":140,"text":860},{"id":872,"depth":140,"text":873},{"id":885,"depth":140,"text":886},{"id":892,"depth":123,"text":893,"children":1483},[1484,1485,1486],{"id":896,"depth":140,"text":897},{"id":926,"depth":140,"text":927},{"id":950,"depth":140,"text":951},{"id":974,"depth":123,"text":975},{"id":1087,"depth":123,"text":1088,"children":1489},[1490,1491],{"id":1091,"depth":140,"text":1092},{"id":1152,"depth":140,"text":1152},{"id":1199,"depth":123,"text":1200},{"id":1273,"depth":123,"text":1273},{"id":1311,"depth":123,"text":1312},{"id":1368,"depth":123,"text":1368},{"id":1403,"depth":123,"text":1403},{"id":733,"depth":123,"text":733},"concept",{},"\u002Fwiki\u002Fai-agent",[1502,1503],"claude-sonnet-4","gpt-5",[786,787,1505,1506,1507],"agent\u002Fgeneral\u002Fmanus","coding\u002Fagent\u002Fdevin","coding\u002Fcli\u002Fclaude-code",{"title":799,"description":108},"ai-agent","wiki\u002Fai-agent","能自主感知环境、规划任务、调用工具、持续迭代的 AI 系统，从聊天机器人进化到能干活的数字员工。",[795,1513,429,821],"智能体","XSXWfdT7aUU7OnpfUKa72KYyecYTn6DpyGWpt9mYWEc",{"id":1516,"title":1517,"body":1518,"category":2057,"description":108,"extension":780,"meta":2058,"navigation":310,"path":2059,"published":783,"relatedModels":2060,"relatedTools":2061,"seo":2063,"slug":2064,"stem":2065,"summary":2066,"tags":2067,"updated":783,"__hash__":2070},"wiki\u002Fwiki\u002Fcontext-engineering.md","Context Engineering（上下文工程）",{"type":11,"value":1519,"toc":2038},[1520,1524,1531,1537,1540,1543,1549,1556,1559,1563,1566,1572,1576,1579,1585,1589,1592,1652,1659,1663,1666,1732,1738,1742,1748,1754,1757,1771,1774,1778,1781,1807,1811,1814,1835,1839,1842,1888,1891,1894,1920,1924,1994,1999,2001,2035],[14,1521,1523],{"id":1522},"什么是-context-engineering","什么是 Context Engineering",[19,1525,1526,1527,1530],{},"Context Engineering（上下文工程）是 Prompt Engineering 的进化版。不再只关注\"怎么写提示词\"，而是关注",[23,1528,1529],{},"怎么组装喂给模型的全部上下文","——系统提示、对话历史、检索结果、工具定义、few-shot 示例等。",[19,1532,1533,1534,347],{},"核心理念：",[23,1535,1536],{},"LLM 的能力上限不取决于模型本身，而取决于你给它什么上下文",[14,1538,1539],{"id":1539},"上下文窗口的组成",[19,1541,1542],{},"一个完整的 LLM 调用，上下文通常包含：",[103,1544,1547],{"className":1545,"code":1546,"language":220},[218],"┌─────────────────────────────────┐\n│ System Prompt（系统提示）        │  ← 角色设定、行为规范\n├─────────────────────────────────┤\n│ Tool Definitions（工具定义）     │  ← 可调用的函数签名\n├─────────────────────────────────┤\n│ Few-shot Examples（示例）        │  ← 输入输出范例\n├─────────────────────────────────┤\n│ Retrieved Context（检索内容）    │  ← RAG 检索的文档\n├─────────────────────────────────┤\n│ Conversation History（对话历史） │  ← 之前的对话\n├─────────────────────────────────┤\n│ Current Query（当前问题）        │  ← 用户本次输入\n└─────────────────────────────────┘\n",[110,1548,1546],{"__ignoreMap":108},[19,1550,1551,1552,1555],{},"顺序不是随便摆的。",[23,1553,1554],{},"稳定的、重复出现的内容放最前面","，让 prompt cache 命中率高；动态变化的（用户当前问题）放最后。这一条规则单独就能为高频应用省下 60-90% 的 input token 成本。",[14,1557,1558],{"id":1558},"核心原则",[95,1560,1562],{"id":1561},"_1-信号最大化","1. 信号最大化",[19,1564,1565],{},"上下文中每一条信息都应该对完成任务有贡献。删掉无关内容，让模型聚焦。",[103,1567,1570],{"className":1568,"code":1569,"language":220},[218],"❌ 把整个项目代码塞进上下文\n✅ 只塞当前修改的文件 + 相关接口定义\n",[110,1571,1569],{"__ignoreMap":108},[95,1573,1575],{"id":1574},"_2-噪音最小化","2. 噪音最小化",[19,1577,1578],{},"无关信息会分散模型注意力，降低输出质量。",[103,1580,1583],{"className":1581,"code":1582,"language":220},[218],"❌ \"你是一个 AI 助手，你有强大的能力，你的任务是...\"\n✅ \"Review this PR for security issues.\"  （直接给任务）\n",[110,1584,1582],{"__ignoreMap":108},[95,1586,1588],{"id":1587},"_3-结构化","3. 结构化",[19,1590,1591],{},"用清晰的格式让模型快速理解上下文结构：",[103,1593,1597],{"className":1594,"code":1595,"language":1596,"meta":108,"style":108},"language-xml shiki shiki-themes github-light github-dark","\u003Ccontext>\n  \u003Cfile path=\"src\u002Fauth.ts\">\n    ... file content ...\n  \u003C\u002Ffile>\n  \u003Cfile path=\"src\u002Fdb.ts\">\n    ... file content ...\n  \u003C\u002Ffile>\n\u003C\u002Fcontext>\n\u003Ctask>\n  Review the auth module for security vulnerabilities.\n\u003C\u002Ftask>\n","xml",[110,1598,1599,1604,1609,1614,1619,1624,1628,1632,1637,1642,1647],{"__ignoreMap":108},[113,1600,1601],{"class":115,"line":116},[113,1602,1603],{},"\u003Ccontext>\n",[113,1605,1606],{"class":115,"line":123},[113,1607,1608],{},"  \u003Cfile path=\"src\u002Fauth.ts\">\n",[113,1610,1611],{"class":115,"line":140},[113,1612,1613],{},"    ... file content ...\n",[113,1615,1616],{"class":115,"line":153},[113,1617,1618],{},"  \u003C\u002Ffile>\n",[113,1620,1621],{"class":115,"line":174},[113,1622,1623],{},"  \u003Cfile path=\"src\u002Fdb.ts\">\n",[113,1625,1626],{"class":115,"line":187},[113,1627,1613],{},[113,1629,1630],{"class":115,"line":5},[113,1631,1618],{},[113,1633,1634],{"class":115,"line":272},[113,1635,1636],{},"\u003C\u002Fcontext>\n",[113,1638,1639],{"class":115,"line":278},[113,1640,1641],{},"\u003Ctask>\n",[113,1643,1644],{"class":115,"line":284},[113,1645,1646],{},"  Review the auth module for security vulnerabilities.\n",[113,1648,1649],{"class":115,"line":290},[113,1650,1651],{},"\u003C\u002Ftask>\n",[19,1653,1654,1655,1658],{},"XML 和 Markdown 都行。Anthropic 的训练数据里 XML 标签出现频率更高，对 Claude 而言 XML 略胜；OpenAI 模型对 Markdown 表头敏感。在生产里",[23,1656,1657],{},"挑一种坚持用","，不要随机混搭。",[95,1660,1662],{"id":1661},"_4-上下文缓存","4. 上下文缓存",[19,1664,1665],{},"对于重复出现的上下文（系统提示、工具定义），使用 prompt caching 避免重复计算：",[357,1667,1668,1680],{},[360,1669,1670],{},[363,1671,1672,1674,1677],{},[366,1673,1209],{},[366,1675,1676],{},"机制",[366,1678,1679],{},"命中后价格",[375,1681,1682,1697,1708,1722],{},[363,1683,1684,1687,1694],{},[380,1685,1686],{},"Anthropic Prompt Caching",[380,1688,1689,1690,1693],{},"显式 ",[110,1691,1692],{},"cache_control"," 标记",[380,1695,1696],{},"Input 价格 -90%",[363,1698,1699,1702,1705],{},[380,1700,1701],{},"OpenAI Cached Input",[380,1703,1704],{},"自动缓存前缀（≥ 1024 token）",[380,1706,1707],{},"Input 价格 -50%",[363,1709,1710,1713,1719],{},[380,1711,1712],{},"Gemini Context Caching",[380,1714,1689,1715,1718],{},[110,1716,1717],{},"cachedContent"," API",[380,1720,1721],{},"Input 价格 -75%（按时间收存储费）",[363,1723,1724,1727,1730],{},[380,1725,1726],{},"DeepSeek Context Caching",[380,1728,1729],{},"自动缓存",[380,1731,1696],{},[19,1733,1734,1737],{},[23,1735,1736],{},"实测对比","：一个 50K token 的 system prompt，每次调用，开缓存与不开缓存的 input 成本差距 5-20 倍。高频调用的场景一定要开。",[14,1739,1741],{"id":1740},"中间遗忘lost-in-the-middle","中间遗忘（Lost in the Middle）",[19,1743,1744,1745,347],{},"上下文不是越长越好。论文与实测都显示模型对",[23,1746,1747],{},"首尾内容记得更牢，中间段容易被忽略",[103,1749,1752],{"className":1750,"code":1751,"language":220},[218],"[ system  | 检索结果1 | 检索结果2 | ... 检索结果10 | history | query ]\n   ↑ 强                ↑ 弱                    ↑ 强\n",[110,1753,1751],{"__ignoreMap":108},[19,1755,1756],{},"防御套路：",[43,1758,1759,1762,1765,1768],{},[46,1760,1761],{},"把关键信息放上下文开头或结尾，不要藏在中段",[46,1763,1764],{},"检索结果按相关度排序，最相关的放最前 \u002F 最后",[46,1766,1767],{},"长 history 周期性总结压缩，不要原文堆积",[46,1769,1770],{},"用「关键指令重复」——重要约束在 system prompt 开头说一遍、结尾再说一遍",[14,1772,1773],{"id":1773},"实战技巧",[95,1775,1777],{"id":1776},"cursor-的上下文管理","Cursor 的上下文管理",[19,1779,1780],{},"Cursor 在 Vibe Coding 时会智能组装上下文：",[43,1782,1783,1789,1795,1801],{},[46,1784,1785,1788],{},[110,1786,1787],{},"@file"," 引入特定文件",[46,1790,1791,1794],{},[110,1792,1793],{},"@folder"," 引入整个目录",[46,1796,1797,1800],{},[110,1798,1799],{},"@web"," 搜索网络内容",[46,1802,1803,1806],{},[110,1804,1805],{},"@docs"," 引入文档",[95,1808,1810],{"id":1809},"claude-code-的上下文策略","Claude Code 的上下文策略",[19,1812,1813],{},"Claude Code 在处理大型项目时：",[59,1815,1816,1826,1829,1832],{},[46,1817,1818,1819,562,1822,1825],{},"用 ",[110,1820,1821],{},"ls",[110,1823,1824],{},"grep"," 探索项目结构（不盲目加载全部文件）",[46,1827,1828],{},"只读取相关文件",[46,1830,1831],{},"用 CLAUDE.md 文件保存项目约定（每次自动加载）",[46,1833,1834],{},"长对话时自动压缩历史",[95,1836,1838],{"id":1837},"token-预算管理","Token 预算管理",[19,1840,1841],{},"200K 上下文不等于应该用满 200K。最佳实践：",[357,1843,1844,1854],{},[360,1845,1846],{},[363,1847,1848,1851],{},[366,1849,1850],{},"上下文用量",[366,1852,1853],{},"效果",[375,1855,1856,1864,1872,1880],{},[363,1857,1858,1861],{},[380,1859,1860],{},"\u003C 50%",[380,1862,1863],{},"最佳质量",[363,1865,1866,1869],{},[380,1867,1868],{},"50%-80%",[380,1870,1871],{},"轻微退化",[363,1873,1874,1877],{},[380,1875,1876],{},"80%-95%",[380,1878,1879],{},"明显退化（\"中间遗忘\"）",[363,1881,1882,1885],{},[380,1883,1884],{},"> 95%",[380,1886,1887],{},"严重退化、可能超限报错",[95,1889,1890],{"id":1890},"调试上下文",[19,1892,1893],{},"线上效果不对，先排查上下文而不是 prompt：",[59,1895,1896,1902,1908,1914],{},[46,1897,1898,1901],{},[23,1899,1900],{},"把实际发给模型的完整上下文 dump 出来","——很多框架会偷偷塞东西、截断中段。先看到真实输入。",[46,1903,1904,1907],{},[23,1905,1906],{},"数 token","——是不是已经过 80%？过了就先压缩。",[46,1909,1910,1913],{},[23,1911,1912],{},"关掉一半内容跑一遍","——如果效果不降反升，说明里面有噪音。",[46,1915,1916,1919],{},[23,1917,1918],{},"变换顺序","——把检索结果挪到 query 之后，对比效果。Claude 对\"指令在 context 之后\"特别敏感。",[14,1921,1923],{"id":1922},"与-prompt-engineering-的区别","与 Prompt Engineering 的区别",[357,1925,1926,1937],{},[360,1927,1928],{},[363,1929,1930,1932,1935],{},[366,1931,368],{},[366,1933,1934],{},"Prompt Engineering",[366,1936,1473],{},[375,1938,1939,1950,1961,1972,1983],{},[363,1940,1941,1944,1947],{},[380,1942,1943],{},"关注点",[380,1945,1946],{},"怎么写提示词",[380,1948,1949],{},"怎么组装全部上下文",[363,1951,1952,1955,1958],{},[380,1953,1954],{},"范围",[380,1956,1957],{},"单条 prompt",[380,1959,1960],{},"系统+工具+检索+历史+示例",[363,1962,1963,1966,1969],{},[380,1964,1965],{},"复杂度",[380,1967,1968],{},"简单",[380,1970,1971],{},"工程化",[363,1973,1974,1977,1980],{},[380,1975,1976],{},"适用",[380,1978,1979],{},"单次调用",[380,1981,1982],{},"Agent \u002F 复杂应用",[363,1984,1985,1988,1991],{},[380,1986,1987],{},"时代",[380,1989,1990],{},"2023",[380,1992,1993],{},"2025+",[19,1995,1996,347],{},[23,1997,1998],{},"Prompt Engineering 是 Context Engineering 的子集",[14,2000,733],{"id":733},[43,2002,2003,2010,2018,2029],{},[46,2004,2005,2006,2009],{},"基础：",[31,2007,1934],{"href":2008},"\u002Fwiki\u002Fprompt-engineering.html","——写好单条 prompt",[46,2011,2012,2013,2017],{},"计费视角：",[31,2014,2016],{"href":2015},"\u002Fwiki\u002Ftoken.html","Token","——上下文长短直接对应账单",[46,2019,2020,2021,600,2025],{},"上下文来源：",[31,2022,2024],{"href":2023},"\u002Fwiki\u002Frag.html","RAG",[31,2026,2028],{"href":2027},"\u002Fwiki\u002Fembedding.html","Embedding",[46,2030,2031,2032,2034],{},"Agent 上下文：",[31,2033,749],{"href":748},"中的 working memory",[760,2036,2037],{},"html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":108,"searchDepth":140,"depth":140,"links":2039},[2040,2041,2042,2048,2049,2055,2056],{"id":1522,"depth":123,"text":1523},{"id":1539,"depth":123,"text":1539},{"id":1558,"depth":123,"text":1558,"children":2043},[2044,2045,2046,2047],{"id":1561,"depth":140,"text":1562},{"id":1574,"depth":140,"text":1575},{"id":1587,"depth":140,"text":1588},{"id":1661,"depth":140,"text":1662},{"id":1740,"depth":123,"text":1741},{"id":1773,"depth":123,"text":1773,"children":2050},[2051,2052,2053,2054],{"id":1776,"depth":140,"text":1777},{"id":1809,"depth":140,"text":1810},{"id":1837,"depth":140,"text":1838},{"id":1890,"depth":140,"text":1890},{"id":1922,"depth":123,"text":1923},{"id":733,"depth":123,"text":733},"methodology",{},"\u002Fwiki\u002Fcontext-engineering",[1502,1503],[1507,2062],"coding\u002Fide\u002Fcursor",{"title":1517,"description":108},"context-engineering","wiki\u002Fcontext-engineering","通过精心设计 prompt 上下文（系统提示、示例、检索结果、工具定义）来最大化 LLM 表现的工程方法论。",[1473,2068,1399,2069],"Prompt工程","方法论","Ql_hTVQ4HUubZybSU2_YQ8WZiRR-rfsnZUy5xYVnNrc",{"id":2072,"title":2073,"body":2074,"category":1498,"description":108,"extension":780,"meta":2828,"navigation":310,"path":2829,"published":783,"relatedModels":2830,"relatedTools":2831,"seo":2833,"slug":2834,"stem":2835,"summary":2836,"tags":2837,"updated":783,"__hash__":2840},"wiki\u002Fwiki\u002Fembedding.md","Embedding（向量嵌入）",{"type":11,"value":2075,"toc":2793},[2076,2080,2083,2089,2093,2096,2102,2104,2108,2111,2115,2118,2122,2129,2133,2136,2140,2251,2254,2273,2283,2286,2289,2338,2344,2348,2351,2411,2414,2418,2421,2497,2503,2507,2514,2518,2525,2531,2534,2538,2545,2556,2563,2566,2570,2573,2584,2588,2591,2595,2598,2602,2605,2616,2619,2711,2715,2718,2738,2741,2745,2748,2752,2755,2759,2766,2770,2773,2775],[14,2077,2079],{"id":2078},"什么是-embedding","什么是 Embedding",[19,2081,2082],{},"Embedding（向量嵌入）是把文本、图片、音频等数据转换成一组数字（向量）的过程。这组数字保留了原始数据的语义信息——语义相近的内容，向量距离也近。",[19,2084,2085,2086,347],{},"简单说：",[23,2087,2088],{},"把\"意思\"变成\"坐标\"",[14,2090,2092],{"id":2091},"为什么需要-embedding","为什么需要 Embedding",[19,2094,2095],{},"计算机不能直接理解\"猫和老虎比猫和桌子更像\"。但如果把每个词映射到高维空间中的一个点，让\"猫\"和\"老虎\"的坐标接近、\"猫\"和\"桌子\"的坐标远离，计算机就能通过计算距离来判断语义相似度。",[103,2097,2100],{"className":2098,"code":2099,"language":220},[218],"猫   → [0.21, -0.35, 0.88, ...]  ← 768 维向量\n老虎 → [0.19, -0.31, 0.91, ...]  ← 与\"猫\"距离很近\n桌子 → [-0.72, 0.55, -0.12, ...] ← 与\"猫\"距离很远\n",[110,2101,2099],{"__ignoreMap":108},[14,2103,93],{"id":93},[95,2105,2107],{"id":2106},"_1-文本-token","1. 文本 → Token",[19,2109,2110],{},"先把文本切分成 token（与 LLM 一样的分词方式）。",[95,2112,2114],{"id":2113},"_2-token-向量","2. Token → 向量",[19,2116,2117],{},"通过预训练的 Embedding 模型，把每个 token 映射到高维空间。",[95,2119,2121],{"id":2120},"_3-聚合","3. 聚合",[19,2123,2124,2125,2128],{},"把一段文本所有 token 的向量聚合成一个向量（通常用平均池化或特殊 ",[113,2126,2127],{},"CLS"," token）。",[95,2130,2132],{"id":2131},"_4-存储-检索","4. 存储 + 检索",[19,2134,2135],{},"把生成的向量存入向量数据库，查询时把问题也转成向量，找距离最近的。",[14,2137,2139],{"id":2138},"常见-embedding-模型","常见 Embedding 模型",[357,2141,2142,2156],{},[360,2143,2144],{},[363,2145,2146,2149,2151,2153],{},[366,2147,2148],{},"模型",[366,2150,368],{},[366,2152,1215],{},[366,2154,2155],{},"价格",[375,2157,2158,2172,2186,2200,2213,2226,2238],{},[363,2159,2160,2163,2166,2169],{},[380,2161,2162],{},"OpenAI text-embedding-3-large",[380,2164,2165],{},"3072",[380,2167,2168],{},"通用、稳定",[380,2170,2171],{},"$0.13\u002FM",[363,2173,2174,2177,2180,2183],{},[380,2175,2176],{},"OpenAI text-embedding-3-small",[380,2178,2179],{},"1536",[380,2181,2182],{},"性价比高",[380,2184,2185],{},"$0.02\u002FM",[363,2187,2188,2191,2194,2197],{},[380,2189,2190],{},"BGE-large-zh-v1.5",[380,2192,2193],{},"1024",[380,2195,2196],{},"中文最佳（开源）",[380,2198,2199],{},"免费",[363,2201,2202,2205,2207,2210],{},[380,2203,2204],{},"Jina Embeddings v3",[380,2206,2193],{},[380,2208,2209],{},"多语言",[380,2211,2212],{},"免费（开源）",[363,2214,2215,2218,2220,2223],{},[380,2216,2217],{},"Cohere embed v4",[380,2219,2193],{},[380,2221,2222],{},"多语言+多模态",[380,2224,2225],{},"$0.10\u002FM",[363,2227,2228,2231,2233,2236],{},[380,2229,2230],{},"GTE-large",[380,2232,2193],{},[380,2234,2235],{},"阿里出品（开源）",[380,2237,2199],{},[363,2239,2240,2243,2245,2248],{},[380,2241,2242],{},"voyage-code-3",[380,2244,2193],{},[380,2246,2247],{},"代码专用",[380,2249,2250],{},"$0.18\u002FM",[95,2252,2253],{"id":2253},"维度选择",[43,2255,2256,2262,2267],{},[46,2257,2258,2261],{},[23,2259,2260],{},"512-768"," — 精度低但速度快，适合大规模粗筛",[46,2263,2264,2266],{},[23,2265,2193],{}," — 平衡，最常用",[46,2268,2269,2272],{},[23,2270,2271],{},"1536-3072"," — 高精度，适合对召回质量要求高的场景",[19,2274,2275,2278,2279,2282],{},[110,2276,2277],{},"text-embedding-3"," 系列支持 ",[23,2280,2281],{},"Matryoshka","：训练时就让前 N 维独立可用，需要时直接截断到 256 \u002F 512 \u002F 1024，不用重新 embed。省存储省检索时间。",[14,2284,2285],{"id":2285},"相似度计算",[19,2287,2288],{},"两个向量之间的\"距离\"有几种计算方式：",[357,2290,2291,2303],{},[360,2292,2293],{},[363,2294,2295,2298,2301],{},[366,2296,2297],{},"方法",[366,2299,2300],{},"说明",[366,2302,1976],{},[375,2304,2305,2316,2327],{},[363,2306,2307,2310,2313],{},[380,2308,2309],{},"余弦相似度",[380,2311,2312],{},"最常用，衡量方向相似性",[380,2314,2315],{},"语义搜索（推荐）",[363,2317,2318,2321,2324],{},[380,2319,2320],{},"欧氏距离",[380,2322,2323],{},"衡量绝对距离",[380,2325,2326],{},"图像检索",[363,2328,2329,2332,2335],{},[380,2330,2331],{},"点积",[380,2333,2334],{},"最快，但受向量长度影响",[380,2336,2337],{},"大规模检索（需归一化）",[19,2339,2340,2343],{},[23,2341,2342],{},"推荐用余弦相似度","，它对向量长度不敏感，适合文本语义匹配。",[14,2345,2347],{"id":2346},"ann-索引百万向量怎么秒级查","ANN 索引：百万向量怎么秒级查",[19,2349,2350],{},"数据量小时（\u003C 10 万）暴力遍历就够。上百万级就必须用 ANN（Approximate Nearest Neighbor）索引：",[357,2352,2353,2365],{},[360,2354,2355],{},[363,2356,2357,2360,2363],{},[366,2358,2359],{},"索引",[366,2361,2362],{},"原理",[366,2364,1976],{},[375,2366,2367,2378,2389,2400],{},[363,2368,2369,2372,2375],{},[380,2370,2371],{},"HNSW",[380,2373,2374],{},"多层图结构跳跃查找",[380,2376,2377],{},"通用首选，召回率最高",[363,2379,2380,2383,2386],{},[380,2381,2382],{},"IVF",[380,2384,2385],{},"先聚类后局部搜索",[380,2387,2388],{},"上亿向量，内存敏感",[363,2390,2391,2394,2397],{},[380,2392,2393],{},"IVF-PQ",[380,2395,2396],{},"IVF + 乘积量化压缩",[380,2398,2399],{},"超大规模，能省 8-16x 内存",[363,2401,2402,2405,2408],{},[380,2403,2404],{},"Flat",[380,2406,2407],{},"暴力扫描",[380,2409,2410],{},"\u003C 10 万向量，要求 100% 召回",[19,2412,2413],{},"经验值：100 万 1024 维向量，HNSW 单机能做到 \u003C10ms 查询；上亿规模建议 IVF-PQ + 多机分片。",[14,2415,2417],{"id":2416},"chunk-size-的取舍","Chunk Size 的取舍",[19,2419,2420],{},"文档分块大小直接决定 RAG 质量，没有银弹，但有经验区间：",[357,2422,2423,2439],{},[360,2424,2425],{},[363,2426,2427,2430,2433,2436],{},[366,2428,2429],{},"chunk size",[366,2431,2432],{},"优点",[366,2434,2435],{},"缺点",[366,2437,2438],{},"适合",[375,2440,2441,2455,2469,2483],{},[363,2442,2443,2446,2449,2452],{},[380,2444,2445],{},"128-256 token",[380,2447,2448],{},"检索精度高",[380,2450,2451],{},"上下文不完整",[380,2453,2454],{},"FAQ、短问答",[363,2456,2457,2460,2463,2466],{},[380,2458,2459],{},"512 token",[380,2461,2462],{},"平衡，最常用",[380,2464,2465],{},"—",[380,2467,2468],{},"通用文档",[363,2470,2471,2474,2477,2480],{},[380,2472,2473],{},"1024 token",[380,2475,2476],{},"上下文完整",[380,2478,2479],{},"检索精度下降",[380,2481,2482],{},"长文档、技术手册",[363,2484,2485,2488,2491,2494],{},[380,2486,2487],{},"整章\u002F整页",[380,2489,2490],{},"语义完整",[380,2492,2493],{},"噪音多、贵",[380,2495,2496],{},"Late Chunking 场景",[19,2498,2499,2502],{},[23,2500,2501],{},"重叠（overlap）一般设 chunk size 的 10-20%","——避免关键句被切到两个 chunk 边界都丢失。",[95,2504,2506],{"id":2505},"late-chunking","Late Chunking",[19,2508,2509,2510,2513],{},"新思路：先 embedding 整篇文档，",[23,2511,2512],{},"然后再切","。让每个 chunk 的向量保留全文上下文。Jina v3 等模型原生支持，对长文档效果显著优于先切后 embed。",[14,2515,2517],{"id":2516},"混合检索bm25-向量","混合检索：BM25 + 向量",[19,2519,2520,2521,2524],{},"纯向量检索有死角——人名、型号、错别字、罕见专业术语，关键词匹配往往更准。生产 RAG 几乎都用",[23,2522,2523],{},"混合检索","：",[103,2526,2529],{"className":2527,"code":2528,"language":220},[218],"query\n  ├→ BM25 检索       → 候选集 A（关键词命中）\n  ├→ Vector 检索     → 候选集 B（语义命中）\n  ↓\n  RRF \u002F 加权融合     → top-K 候选\n  ↓\n  Reranker 二阶段排序 → 最终 top-N\n  ↓\n  喂给 LLM\n",[110,2530,2528],{"__ignoreMap":108},[19,2532,2533],{},"经验值：BM25 和向量按 0.5 \u002F 0.5 加权融合就能比纯向量提升 5-15% 召回率。",[14,2535,2537],{"id":2536},"reranker二阶段排序","Reranker：二阶段排序",[19,2539,2540,2541,2544],{},"向量检索快但粗。",[23,2542,2543],{},"Reranker（Cross-Encoder）"," 慢但准——把 query 和每个候选一起输入模型，输出相关性分数。流程：",[59,2546,2547,2550,2553],{},[46,2548,2549],{},"向量检索召回 top-100",[46,2551,2552],{},"Reranker 重排 top-100 → top-10",[46,2554,2555],{},"top-10 喂给 LLM",[19,2557,2558,2559,2562],{},"主流 Reranker：BGE-reranker-v2、Cohere Rerank 3、Jina Reranker v2。",[23,2560,2561],{},"Reranker 是 RAG 最高 ROI 的优化点之一","，加一步通常能让最终答案质量提升 10-20%。",[14,2564,2565],{"id":2565},"应用场景",[95,2567,2569],{"id":2568},"_1-rag检索增强生成","1. RAG（检索增强生成）",[19,2571,2572],{},"RAG 的核心步骤就是 Embedding：",[59,2574,2575,2578,2581],{},[46,2576,2577],{},"把知识库文档全部 Embedding → 存入向量数据库",[46,2579,2580],{},"用户提问 → Embedding → 在向量数据库找最相似的文档片段",[46,2582,2583],{},"把文档片段 + 问题一起发给 LLM → 生成回答",[95,2585,2587],{"id":2586},"_2-语义搜索","2. 语义搜索",[19,2589,2590],{},"传统搜索靠关键词匹配，搜\"手机\"找不到\"智能手机\"。语义搜索用 Embedding，搜\"手机\"能找到\"移动通信设备\"。",[95,2592,2594],{"id":2593},"_3-推荐系统","3. 推荐系统",[19,2596,2597],{},"把用户行为和内容都 Embedding，推荐与用户向量最近的内容。",[95,2599,2601],{"id":2600},"_4-去重与聚类","4. 去重与聚类",[19,2603,2604],{},"把文档 Embedding 后聚类，相似文档自动归为一类。用于：",[43,2606,2607,2610,2613],{},[46,2608,2609],{},"新闻去重",[46,2611,2612],{},"文档分类",[46,2614,2615],{},"知识图谱构建",[14,2617,2618],{"id":2618},"向量数据库",[357,2620,2621,2632],{},[360,2622,2623],{},[363,2624,2625,2628,2630],{},[366,2626,2627],{},"数据库",[366,2629,1215],{},[366,2631,426],{},[375,2633,2634,2645,2656,2667,2678,2689,2700],{},[363,2635,2636,2639,2642],{},[380,2637,2638],{},"Chroma",[380,2640,2641],{},"轻量、Python 原生",[380,2643,2644],{},"原型开发",[363,2646,2647,2650,2653],{},[380,2648,2649],{},"Qdrant",[380,2651,2652],{},"Rust 高性能、支持过滤",[380,2654,2655],{},"生产环境",[363,2657,2658,2661,2664],{},[380,2659,2660],{},"Milvus",[380,2662,2663],{},"分布式、亿级向量",[380,2665,2666],{},"企业级",[363,2668,2669,2672,2675],{},[380,2670,2671],{},"Pinecone",[380,2673,2674],{},"托管 SaaS、免运维",[380,2676,2677],{},"快速上线",[363,2679,2680,2683,2686],{},[380,2681,2682],{},"pgvector",[380,2684,2685],{},"PostgreSQL 扩展",[380,2687,2688],{},"已有 PG 的项目",[363,2690,2691,2694,2697],{},[380,2692,2693],{},"Weaviate",[380,2695,2696],{},"内置多模态",[380,2698,2699],{},"多模态搜索",[363,2701,2702,2705,2708],{},[380,2703,2704],{},"libsql \u002F sqlite-vec",[380,2706,2707],{},"SQLite 扩展",[380,2709,2710],{},"嵌入式 \u002F 边缘部署",[14,2712,2714],{"id":2713},"批量-embedding-的成本优化","批量 Embedding 的成本优化",[19,2716,2717],{},"百万级文档全量 embed 一次很贵。三招省钱：",[59,2719,2720,2726,2732],{},[46,2721,2722,2725],{},[23,2723,2724],{},"批量 API","——OpenAI \u002F Cohere 都有 batch API，价格直接打五折，24 小时内出结果。",[46,2727,2728,2731],{},[23,2729,2730],{},"本地开源模型","——BGE、GTE 在一张 4090 上跑 100 万段文本只要几小时，电费可忽略。",[46,2733,2734,2737],{},[23,2735,2736],{},"增量更新","——文档没变就别重新 embed，加 hash 去重。",[14,2739,2740],{"id":2740},"常见问题",[95,2742,2744],{"id":2743},"q-embedding-维度越高越好吗","Q: Embedding 维度越高越好吗",[19,2746,2747],{},"不是。高维度带来更高精度，但也带来更大存储和更慢检索。768-1536 维对大多数场景够用。",[95,2749,2751],{"id":2750},"q-不同语言的-embedding-能互相对比吗","Q: 不同语言的 Embedding 能互相对比吗",[19,2753,2754],{},"可以，前提是用了多语言 Embedding 模型（如 BGE-m3、Jina v3）。这样中文\"猫\"和英文\"cat\"的向量距离会很近。",[95,2756,2758],{"id":2757},"q-embedding-能理解代码吗","Q: Embedding 能理解代码吗",[19,2760,2761,2762,2765],{},"专门的代码 Embedding 模型（如 voyage-code-3、jina-embeddings-v2-code）可以。通用 Embedding 模型对代码的语义理解有限——会把所有 ",[110,2763,2764],{},"import os"," 都判定为高度相似。",[95,2767,2769],{"id":2768},"q-换-embedding-模型要重新跑全库吗","Q: 换 Embedding 模型要重新跑全库吗",[19,2771,2772],{},"是。向量空间不通用，不同模型生成的向量不能混用。这也是为什么生产环境上选模型要慎重——重新 embed 一次百万级文档不便宜。",[14,2774,733],{"id":733},[43,2776,2777,2783,2788],{},[46,2778,2779,2780],{},"应用架构：",[31,2781,2782],{"href":2023},"RAG（检索增强生成）",[46,2784,2785,2786],{},"上下文组装：",[31,2787,1473],{"href":1472},[46,2789,2790,2791],{},"计费基础：",[31,2792,2016],{"href":2015},{"title":108,"searchDepth":140,"depth":140,"links":2794},[2795,2796,2797,2803,2806,2807,2808,2811,2812,2813,2819,2820,2821,2827],{"id":2078,"depth":123,"text":2079},{"id":2091,"depth":123,"text":2092},{"id":93,"depth":123,"text":93,"children":2798},[2799,2800,2801,2802],{"id":2106,"depth":140,"text":2107},{"id":2113,"depth":140,"text":2114},{"id":2120,"depth":140,"text":2121},{"id":2131,"depth":140,"text":2132},{"id":2138,"depth":123,"text":2139,"children":2804},[2805],{"id":2253,"depth":140,"text":2253},{"id":2285,"depth":123,"text":2285},{"id":2346,"depth":123,"text":2347},{"id":2416,"depth":123,"text":2417,"children":2809},[2810],{"id":2505,"depth":140,"text":2506},{"id":2516,"depth":123,"text":2517},{"id":2536,"depth":123,"text":2537},{"id":2565,"depth":123,"text":2565,"children":2814},[2815,2816,2817,2818],{"id":2568,"depth":140,"text":2569},{"id":2586,"depth":140,"text":2587},{"id":2593,"depth":140,"text":2594},{"id":2600,"depth":140,"text":2601},{"id":2618,"depth":123,"text":2618},{"id":2713,"depth":123,"text":2714},{"id":2740,"depth":123,"text":2740,"children":2822},[2823,2824,2825,2826],{"id":2743,"depth":140,"text":2744},{"id":2750,"depth":140,"text":2751},{"id":2757,"depth":140,"text":2758},{"id":2768,"depth":140,"text":2769},{"id":733,"depth":123,"text":733},{},"\u002Fwiki\u002Fembedding",[1503],[787,2832],"agent\u002Fplatform\u002Ffastgpt",{"title":2073,"description":108},"embedding","wiki\u002Fembedding","把文本、图片等数据转成高维向量，让机器能通过向量距离衡量语义相似度——RAG、搜索、推荐的基础。",[2028,2838,2839,2024],"向量","语义搜索","HQZClqn97SO2Ug4d_0Qvn1QGq4je614hZOOL-XbFnG4",{"id":2842,"title":2843,"body":2844,"category":2057,"description":108,"extension":780,"meta":3446,"navigation":310,"path":3447,"published":783,"relatedModels":784,"relatedTools":3448,"seo":3449,"slug":3450,"stem":3451,"summary":3452,"tags":3453,"updated":783,"__hash__":3455},"wiki\u002Fwiki\u002Ffine-tuning-vs-rag.md","Fine-tuning vs RAG",{"type":11,"value":2845,"toc":3418},[2846,2849,2852,2865,2868,2871,3004,3008,3011,3079,3087,3091,3095,3098,3101,3105,3108,3111,3115,3118,3121,3125,3128,3131,3135,3139,3142,3146,3149,3153,3156,3160,3163,3167,3170,3239,3245,3249,3252,3313,3319,3322,3325,3344,3350,3353,3356,3360,3363,3367,3370,3374,3377,3381,3384,3387,3393,3395],[14,2847,2848],{"id":2848},"概述",[19,2850,2851],{},"当通用大模型不够用时，有两种主流方案让它适应特定场景：",[43,2853,2854,2860],{},[46,2855,2856,2859],{},[23,2857,2858],{},"Fine-tuning"," — 用领域数据重新训练模型，调整内部权重",[46,2861,2862,2864],{},[23,2863,2024],{}," — 不动模型，检索外部知识作为上下文",[19,2866,2867],{},"90% 的场景应该用 RAG。但什么时候该用 Fine-tuning？本文帮你判断。",[14,2869,2870],{"id":2870},"对比",[357,2872,2873,2883],{},[360,2874,2875],{},[363,2876,2877,2879,2881],{},[366,2878,368],{},[366,2880,2858],{},[366,2882,2024],{},[375,2884,2885,2895,2906,2917,2928,2939,2950,2961,2972,2982,2993],{},[363,2886,2887,2889,2892],{},[380,2888,2362],{},[380,2890,2891],{},"修改模型权重",[380,2893,2894],{},"检索外部知识",[363,2896,2897,2900,2903],{},[380,2898,2899],{},"知识更新",[380,2901,2902],{},"需重新训练",[380,2904,2905],{},"更新文档即可",[363,2907,2908,2911,2914],{},[380,2909,2910],{},"计算成本",[380,2912,2913],{},"高（GPU 训练）",[380,2915,2916],{},"低（检索+推理）",[363,2918,2919,2922,2925],{},[380,2920,2921],{},"数据需求",[380,2923,2924],{},"千条以上标注数据",[380,2926,2927],{},"文档即可",[363,2929,2930,2933,2936],{},[380,2931,2932],{},"延迟",[380,2934,2935],{},"不变",[380,2937,2938],{},"略增（检索时间）",[363,2940,2941,2944,2947],{},[380,2942,2943],{},"可解释性",[380,2945,2946],{},"低（黑盒）",[380,2948,2949],{},"高（能标注来源）",[363,2951,2952,2955,2958],{},[380,2953,2954],{},"幻觉控制",[380,2956,2957],{},"一般",[380,2959,2960],{},"好（有出处）",[363,2962,2963,2966,2969],{},[380,2964,2965],{},"风格调整",[380,2967,2968],{},"✅",[380,2970,2971],{},"❌",[363,2973,2974,2977,2979],{},[380,2975,2976],{},"格式调整",[380,2978,2968],{},[380,2980,2981],{},"⚠️ 靠 prompt",[363,2983,2984,2987,2990],{},[380,2985,2986],{},"事实性知识",[380,2988,2989],{},"❌ 不推荐",[380,2991,2992],{},"✅ 推荐",[363,2994,2995,2998,3001],{},[380,2996,2997],{},"私有数据",[380,2999,3000],{},"⚠️ 有泄露风险",[380,3002,3003],{},"✅ 数据在本地",[14,3005,3007],{"id":3006},"fine-tuning-的几种姿势","Fine-tuning 的几种姿势",[19,3009,3010],{},"业内说\"做 fine-tuning\"时通常指三种不同的事，混着用容易踩坑：",[357,3012,3013,3027],{},[360,3014,3015],{},[363,3016,3017,3019,3022,3025],{},[366,3018,1101],{},[366,3020,3021],{},"做什么",[366,3023,3024],{},"数据要求",[366,3026,2438],{},[375,3028,3029,3046,3063],{},[363,3030,3031,3037,3040,3043],{},[380,3032,3033,3036],{},[23,3034,3035],{},"SFT","（Supervised Fine-Tuning）",[380,3038,3039],{},"给输入-输出对，教模型新行为\u002F格式",[380,3041,3042],{},"千-万条标注",[380,3044,3045],{},"风格、格式、领域语言",[363,3047,3048,3054,3057,3060],{},[380,3049,3050,3053],{},[23,3051,3052],{},"DPO","（Direct Preference Optimization）",[380,3055,3056],{},"给\"更好的\"和\"更差的\"两个回答，让模型学偏好",[380,3058,3059],{},"数千 pair",[380,3061,3062],{},"对齐人类偏好、减少幻觉",[363,3064,3065,3070,3073,3076],{},[380,3066,3067],{},[23,3068,3069],{},"RLHF",[380,3071,3072],{},"DPO 的前身，用奖励模型 + PPO，工程链复杂",[380,3074,3075],{},"万级 + 奖励模型",[380,3077,3078],{},"大厂基座对齐",[19,3080,3081,3082,3086],{},"中小团队 90% 的「fine-tune」需求其实是 SFT。配合 ",[31,3083,3085],{"href":3084},"\u002Fwiki\u002Flora.html","LoRA"," \u002F QLoRA，单张 4090 就能跑动 7B-13B 模型的 SFT。",[14,3088,3090],{"id":3089},"什么时候用-fine-tuning","什么时候用 Fine-tuning",[95,3092,3094],{"id":3093},"_1-调整输出风格","1. 调整输出风格",[19,3096,3097],{},"模型需要特定的写作风格、语气、格式，而这些很难用 prompt 描述清楚。",[19,3099,3100],{},"例子：让模型模仿某个品牌的文案风格、生成特定格式的法律文书。",[95,3102,3104],{"id":3103},"_2-特定领域术语","2. 特定领域术语",[19,3106,3107],{},"模型对某个领域的专业术语理解不准确，需要大量领域数据训练。",[19,3109,3110],{},"例子：医学影像报告生成、法律条文引用。",[95,3112,3114],{"id":3113},"_3-降低推理成本","3. 降低推理成本",[19,3116,3117],{},"把 prompt 中的大量指令\"固化\"到模型权重中，减少每次推理的 token 用量。",[19,3119,3120],{},"例子：固定格式的客服分类、情感分析。",[95,3122,3124],{"id":3123},"_4-任务专精","4. 任务专精",[19,3126,3127],{},"模型需要在某个窄任务上达到极致表现，牺牲通用能力换取专业能力。",[19,3129,3130],{},"例子：代码补全专用模型、SQL 生成专用模型。",[14,3132,3134],{"id":3133},"什么时候用-rag","什么时候用 RAG",[95,3136,3138],{"id":3137},"_1-知识库问答","1. 知识库问答 ✅",[19,3140,3141],{},"企业内部文档、产品手册、FAQ 问答。这是 RAG 的主场。",[95,3143,3145],{"id":3144},"_2-实时信息","2. 实时信息 ✅",[19,3147,3148],{},"需要查最新数据（股价、天气、新闻）。Fine-tuning 无法解决知识时效性。",[95,3150,3152],{"id":3151},"_3-多源知识","3. 多源知识 ✅",[19,3154,3155],{},"需要综合多个文档来源回答问题。Fine-tuning 会把知识\"混\"在一起，RAG 能精确标注来源。",[95,3157,3159],{"id":3158},"_4-数据安全敏感","4. 数据安全敏感 ✅",[19,3161,3162],{},"数据不能进入模型权重（合规、隐私）。RAG 把数据放在外部知识库，可控制访问权限。",[14,3164,3166],{"id":3165},"主流-fine-tuning-服务对比","主流 Fine-tuning 服务对比",[19,3168,3169],{},"如果决定做 SFT，三条路线：",[357,3171,3172,3187],{},[360,3173,3174],{},[363,3175,3176,3179,3181,3184],{},[366,3177,3178],{},"路线",[366,3180,2438],{},[366,3182,3183],{},"代价",[366,3185,3186],{},"限制",[375,3188,3189,3205,3222],{},[363,3190,3191,3196,3199,3202],{},[380,3192,3193],{},[23,3194,3195],{},"OpenAI \u002F Anthropic Fine-tuning API",[380,3197,3198],{},"不想动 GPU、对 GPT\u002FClaude 系列做 SFT",[380,3200,3201],{},"训练按 token 计费，推理价格略高于基础模型",[380,3203,3204],{},"闭源模型，只能在原厂托管",[363,3206,3207,3213,3216,3219],{},[380,3208,3209,3212],{},[23,3210,3211],{},"自托管 LoRA","（LLaMA-Factory \u002F Axolotl）",[380,3214,3215],{},"开源模型（Qwen \u002F Llama \u002F DeepSeek）、私有部署",[380,3217,3218],{},"一张 A100 \u002F 4090 + 一周折腾",[380,3220,3221],{},"自己负责训练 + 推理基建",[363,3223,3224,3230,3233,3236],{},[380,3225,3226,3229],{},[23,3227,3228],{},"托管训练平台","（Together \u002F Anyscale \u002F 国内火山）",[380,3231,3232],{},"开源模型、不想自己搭训练环境",[380,3234,3235],{},"介于上面两者之间",[380,3237,3238],{},"模型权重归你，托管推理",[19,3240,1361,3241,3244],{},[23,3242,3243],{},"先试 prompt + RAG，搞不定再试 SFT，SFT 搞不定再试 DPO","。直接上 RLHF 是大厂才该做的事。",[14,3246,3248],{"id":3247},"成本量化粗略数量级","成本量化（粗略数量级）",[19,3250,3251],{},"按 100 万条对话场景估算：",[357,3253,3254,3267],{},[360,3255,3256],{},[363,3257,3258,3261,3264],{},[366,3259,3260],{},"方案",[366,3262,3263],{},"一次性成本",[366,3265,3266],{},"持续成本",[375,3268,3269,3280,3291,3302],{},[363,3270,3271,3274,3277],{},[380,3272,3273],{},"Prompt + RAG",[380,3275,3276],{},"几百块（向量库 + embed）",[380,3278,3279],{},"按调用次数 × token 价",[363,3281,3282,3285,3288],{},[380,3283,3284],{},"LoRA SFT（开源 7B）",[380,3286,3287],{},"几千-几万（GPU 租用 + 数据标注）",[380,3289,3290],{},"推理便宜，但要自己运维",[363,3292,3293,3296,3299],{},[380,3294,3295],{},"全量微调（7B 闭源 API）",[380,3297,3298],{},"几万-几十万（标注 + 训练费）",[380,3300,3301],{},"推理价 ≈ 基础模型的 1.5-3x",[363,3303,3304,3307,3310],{},[380,3305,3306],{},"全量微调（70B 自托管）",[380,3308,3309],{},"十万级起",[380,3311,3312],{},"持续 GPU 集群",[19,3314,3315,3318],{},[23,3316,3317],{},"先算账再决定","——很多团队 fine-tune 完一年的推理增量成本就超过了节省的 prompt token 钱。",[14,3320,3321],{"id":3321},"可以组合使用",[19,3323,3324],{},"Fine-tuning 和 RAG 不是互斥的。最佳实践：",[59,3326,3327,3333,3338],{},[46,3328,3329,3332],{},[23,3330,3331],{},"Fine-tune"," 调整模型风格和格式",[46,3334,3335,3337],{},[23,3336,2024],{}," 提供事实性知识",[46,3339,3340,3343],{},[23,3341,3342],{},"Prompt"," 控制行为规范",[103,3345,3348],{"className":3346,"code":3347,"language":220},[218],"用户提问\n  ↓\nRAG 检索相关文档\n  ↓\nFine-tuned 模型（已有正确风格）\n  + 检索到的文档上下文\n  + System Prompt（行为规范）\n  → 生成回答\n",[110,3349,3347],{"__ignoreMap":108},[19,3351,3352],{},"真实案例：客服场景——用 SFT 教模型\"用什么语气说话\"和\"什么不能承诺\"，用 RAG 提供商品\u002F订单实时数据。两者分工明确，单独用任何一个都做不好。",[14,3354,3355],{"id":3355},"常见误区",[95,3357,3359],{"id":3358},"误区-1-我们的数据很特殊必须-fine-tune","误区 1: \"我们的数据很特殊，必须 fine-tune\"",[19,3361,3362],{},"大多数情况下，你的数据只是\"模型没见过\"而非\"模型理解不了\"。RAG 就能让模型看到你的数据。",[95,3364,3366],{"id":3365},"误区-2-fine-tuning-会让模型更聪明","误区 2: \"Fine-tuning 会让模型更聪明\"",[19,3368,3369],{},"Fine-tuning 调整的是行为模式，不是知识。模型不会因为 fine-tuning 就变得更擅长推理。",[95,3371,3373],{"id":3372},"误区-3-fine-tuning-后不需要-prompt-了","误区 3: \"Fine-tuning 后不需要 prompt 了\"",[19,3375,3376],{},"Fine-tuning 后仍需要好的 prompt。Fine-tuning 只是让模型在特定模式下更可靠。",[95,3378,3380],{"id":3379},"误区-4-fine-tune-一次就一劳永逸","误区 4: \"Fine-tune 一次就一劳永逸\"",[19,3382,3383],{},"模型升级（GPT-4 → GPT-5、Claude 3 → Claude 4），你的 fine-tune 模型还停在旧基座。要么不升级享受不到新能力，要么重新跑训练流程。RAG 没这问题。",[14,3385,3386],{"id":3386},"决策树",[103,3388,3391],{"className":3389,"code":3390,"language":220},[218],"你的需求是什么？\n├─ 让模型知道新知识 → RAG\n├─ 让模型改变输出风格 → Fine-tuning（SFT）\n├─ 让模型理解领域术语 → 先 RAG（喂术语表），不行再 SFT\n├─ 让模型用特定格式输出 → 先 Structured Output \u002F Prompt，不行再 SFT\n├─ 让模型基于私有数据回答 → RAG\n├─ 减少幻觉 \u002F 对齐偏好 → DPO\n└─ 以上多个 → RAG + SFT 组合\n",[110,3392,3390],{"__ignoreMap":108},[14,3394,733],{"id":733},[43,3396,3397,3403,3408,3413],{},[46,3398,3399,3400],{},"高效微调：",[31,3401,3402],{"href":3084},"LoRA（低秩适配）",[46,3404,3405,3406],{},"检索架构：",[31,3407,2782],{"href":2023},[46,3409,3410,3411],{},"数据准备：",[31,3412,2028],{"href":2027},[46,3414,3415,3416],{},"控制输出：",[31,3417,1934],{"href":2008},{"title":108,"searchDepth":140,"depth":140,"links":3419},[3420,3421,3422,3423,3429,3435,3436,3437,3438,3444,3445],{"id":2848,"depth":123,"text":2848},{"id":2870,"depth":123,"text":2870},{"id":3006,"depth":123,"text":3007},{"id":3089,"depth":123,"text":3090,"children":3424},[3425,3426,3427,3428],{"id":3093,"depth":140,"text":3094},{"id":3103,"depth":140,"text":3104},{"id":3113,"depth":140,"text":3114},{"id":3123,"depth":140,"text":3124},{"id":3133,"depth":123,"text":3134,"children":3430},[3431,3432,3433,3434],{"id":3137,"depth":140,"text":3138},{"id":3144,"depth":140,"text":3145},{"id":3151,"depth":140,"text":3152},{"id":3158,"depth":140,"text":3159},{"id":3165,"depth":123,"text":3166},{"id":3247,"depth":123,"text":3248},{"id":3321,"depth":123,"text":3321},{"id":3355,"depth":123,"text":3355,"children":3439},[3440,3441,3442,3443],{"id":3358,"depth":140,"text":3359},{"id":3365,"depth":140,"text":3366},{"id":3372,"depth":140,"text":3373},{"id":3379,"depth":140,"text":3380},{"id":3386,"depth":123,"text":3386},{"id":733,"depth":123,"text":733},{},"\u002Fwiki\u002Ffine-tuning-vs-rag",[787,2832],{"title":2843,"description":108},"fine-tuning-vs-rag","wiki\u002Ffine-tuning-vs-rag","两种让大模型适应特定场景的方法对比：Fine-tuning 修改模型权重，RAG 引入外部知识。大多数场景应该用 RAG。",[2858,2024,3454,2069],"模型微调","7woP_8rBJsgZtuRZpWqq8ZQ4Dg4j8-07DICupRLownE",{"id":3457,"title":3458,"body":3459,"category":1498,"description":108,"extension":780,"meta":4427,"navigation":310,"path":4428,"published":783,"relatedModels":4429,"relatedTools":4431,"seo":4432,"slug":4433,"stem":4434,"summary":4435,"tags":4436,"updated":783,"__hash__":4437},"wiki\u002Fwiki\u002Ffunction-calling.md","Function Calling（函数调用）",{"type":11,"value":3460,"toc":4401},[3461,3465,3468,3474,3483,3485,3489,3492,3610,3614,3617,3682,3686,3689,3718,3722,3725,3729,3732,3770,3777,3795,3799,3802,3858,3864,3868,3874,3877,3888,3892,3987,3996,3999,4003,4006,4165,4168,4171,4182,4185,4196,4198,4202,4231,4235,4264,4268,4271,4301,4304,4308,4311,4322,4325,4328,4378,4380,4398],[14,3462,3464],{"id":3463},"什么是-function-calling","什么是 Function Calling",[19,3466,3467],{},"Function Calling（函数调用）是让大模型调用外部函数的能力。你告诉模型有哪些函数可用，模型根据用户意图决定调用哪个函数、传什么参数。",[103,3469,3472],{"className":3470,"code":3471,"language":220},[218],"用户：\"上海今天天气怎么样？\"\n  ↓\n模型分析：需要查天气 → 调用 get_weather(\"上海\")\n  ↓\n你的代码执行 get_weather(\"上海\") → 返回 { temp: 28°C, condition: \"多云\" }\n  ↓\n模型基于返回结果生成回答：\"上海今天 28°C，多云，适合出行。\"\n",[110,3473,3471],{"__ignoreMap":108},[86,3475,3476],{},[19,3477,3478,3479,3482],{},"注意：模型",[23,3480,3481],{},"自己不能执行函数","。它只输出\"我想调 get_weather('上海')\"这条意图，真正的 HTTP 请求 \u002F DB 查询是你的应用代码去跑。这是 Function Calling 最容易被误解的一点。",[14,3484,93],{"id":93},[95,3486,3488],{"id":3487},"_1-定义函数","1. 定义函数",[19,3490,3491],{},"你向模型提供函数的 JSON Schema 描述：",[103,3493,3495],{"className":105,"code":3494,"language":107,"meta":108,"style":108},"{\n  \"name\": \"get_weather\",\n  \"description\": \"查询指定城市的天气\",\n  \"parameters\": {\n    \"type\": \"object\",\n    \"properties\": {\n      \"city\": {\n        \"type\": \"string\",\n        \"description\": \"城市名，如'上海'\"\n      }\n    },\n    \"required\": [\"city\"]\n  }\n}\n",[110,3496,3497,3501,3512,3523,3531,3543,3550,3557,3569,3579,3584,3589,3602,3606],{"__ignoreMap":108},[113,3498,3499],{"class":115,"line":116},[113,3500,120],{"class":119},[113,3502,3503,3505,3507,3510],{"class":115,"line":123},[113,3504,127],{"class":126},[113,3506,130],{"class":119},[113,3508,3509],{"class":133},"\"get_weather\"",[113,3511,137],{"class":119},[113,3513,3514,3516,3518,3521],{"class":115,"line":140},[113,3515,143],{"class":126},[113,3517,130],{"class":119},[113,3519,3520],{"class":133},"\"查询指定城市的天气\"",[113,3522,137],{"class":119},[113,3524,3525,3528],{"class":115,"line":153},[113,3526,3527],{"class":126},"  \"parameters\"",[113,3529,3530],{"class":119},": {\n",[113,3532,3533,3536,3538,3541],{"class":115,"line":174},[113,3534,3535],{"class":126},"    \"type\"",[113,3537,130],{"class":119},[113,3539,3540],{"class":133},"\"object\"",[113,3542,137],{"class":119},[113,3544,3545,3548],{"class":115,"line":187},[113,3546,3547],{"class":126},"    \"properties\"",[113,3549,3530],{"class":119},[113,3551,3552,3555],{"class":115,"line":5},[113,3553,3554],{"class":126},"      \"city\"",[113,3556,3530],{"class":119},[113,3558,3559,3562,3564,3567],{"class":115,"line":272},[113,3560,3561],{"class":126},"        \"type\"",[113,3563,130],{"class":119},[113,3565,3566],{"class":133},"\"string\"",[113,3568,137],{"class":119},[113,3570,3571,3574,3576],{"class":115,"line":278},[113,3572,3573],{"class":126},"        \"description\"",[113,3575,130],{"class":119},[113,3577,3578],{"class":133},"\"城市名，如'上海'\"\n",[113,3580,3581],{"class":115,"line":284},[113,3582,3583],{"class":119},"      }\n",[113,3585,3586],{"class":115,"line":290},[113,3587,3588],{"class":119},"    },\n",[113,3590,3591,3594,3596,3599],{"class":115,"line":296},[113,3592,3593],{"class":126},"    \"required\"",[113,3595,159],{"class":119},[113,3597,3598],{"class":133},"\"city\"",[113,3600,3601],{"class":119},"]\n",[113,3603,3604],{"class":115,"line":302},[113,3605,299],{"class":119},[113,3607,3608],{"class":115,"line":307},[113,3609,200],{"class":119},[95,3611,3613],{"id":3612},"_2-模型决定调用","2. 模型决定调用",[19,3615,3616],{},"模型根据用户输入，决定是否需要调用函数：",[103,3618,3620],{"className":105,"code":3619,"language":107,"meta":108,"style":108},"{\n  \"function_call\": {\n    \"name\": \"get_weather\",\n    \"arguments\": \"{\\\"city\\\": \\\"上海\\\"}\"\n  }\n}\n",[110,3621,3622,3626,3633,3644,3674,3678],{"__ignoreMap":108},[113,3623,3624],{"class":115,"line":116},[113,3625,120],{"class":119},[113,3627,3628,3631],{"class":115,"line":123},[113,3629,3630],{"class":126},"  \"function_call\"",[113,3632,3530],{"class":119},[113,3634,3635,3638,3640,3642],{"class":115,"line":140},[113,3636,3637],{"class":126},"    \"name\"",[113,3639,130],{"class":119},[113,3641,3509],{"class":133},[113,3643,137],{"class":119},[113,3645,3646,3649,3651,3654,3657,3660,3662,3664,3666,3669,3671],{"class":115,"line":153},[113,3647,3648],{"class":126},"    \"arguments\"",[113,3650,130],{"class":119},[113,3652,3653],{"class":133},"\"{",[113,3655,3656],{"class":126},"\\\"",[113,3658,3659],{"class":133},"city",[113,3661,3656],{"class":126},[113,3663,130],{"class":133},[113,3665,3656],{"class":126},[113,3667,3668],{"class":133},"上海",[113,3670,3656],{"class":126},[113,3672,3673],{"class":133},"}\"\n",[113,3675,3676],{"class":115,"line":174},[113,3677,299],{"class":119},[113,3679,3680],{"class":115,"line":187},[113,3681,200],{"class":119},[95,3683,3685],{"id":3684},"_3-你执行函数","3. 你执行函数",[19,3687,3688],{},"你的代码执行实际函数调用，返回结果给模型：",[103,3690,3694],{"className":3691,"code":3692,"language":3693,"meta":108,"style":108},"language-python shiki shiki-themes github-light github-dark","result = get_weather(\"上海\")  # { temp: 28, condition: \"多云\" }\n","python",[110,3695,3696],{"__ignoreMap":108},[113,3697,3698,3701,3705,3708,3711,3714],{"class":115,"line":116},[113,3699,3700],{"class":119},"result ",[113,3702,3704],{"class":3703},"szBVR","=",[113,3706,3707],{"class":119}," get_weather(",[113,3709,3710],{"class":133},"\"上海\"",[113,3712,3713],{"class":119},")  ",[113,3715,3717],{"class":3716},"sJ8bj","# { temp: 28, condition: \"多云\" }\n",[95,3719,3721],{"id":3720},"_4-模型生成最终回答","4. 模型生成最终回答",[19,3723,3724],{},"模型基于函数返回结果生成自然语言回答。",[14,3726,3728],{"id":3727},"parallel-tool-calls一次调多个","Parallel Tool Calls：一次调多个",[19,3730,3731],{},"现代模型（GPT-4o+、Claude Sonnet 4+、Gemini 2.5+）支持单轮内发起多个工具调用。例如用户问\"对比上海和北京的天气\"：",[103,3733,3735],{"className":231,"code":3734,"language":233,"meta":108,"style":108},"\u002F\u002F 模型一次返回两个 tool_call\n{\n  \"tool_calls\": [\n    { \"id\": \"t1\", \"function\": { \"name\": \"get_weather\", \"arguments\": \"{\\\"city\\\":\\\"上海\\\"}\" } },\n    { \"id\": \"t2\", \"function\": { \"name\": \"get_weather\", \"arguments\": \"{\\\"city\\\":\\\"北京\\\"}\" } }\n  ]\n}\n",[110,3736,3737,3742,3746,3751,3756,3761,3766],{"__ignoreMap":108},[113,3738,3739],{"class":115,"line":116},[113,3740,3741],{},"\u002F\u002F 模型一次返回两个 tool_call\n",[113,3743,3744],{"class":115,"line":123},[113,3745,120],{},[113,3747,3748],{"class":115,"line":140},[113,3749,3750],{},"  \"tool_calls\": [\n",[113,3752,3753],{"class":115,"line":153},[113,3754,3755],{},"    { \"id\": \"t1\", \"function\": { \"name\": \"get_weather\", \"arguments\": \"{\\\"city\\\":\\\"上海\\\"}\" } },\n",[113,3757,3758],{"class":115,"line":174},[113,3759,3760],{},"    { \"id\": \"t2\", \"function\": { \"name\": \"get_weather\", \"arguments\": \"{\\\"city\\\":\\\"北京\\\"}\" } }\n",[113,3762,3763],{"class":115,"line":187},[113,3764,3765],{},"  ]\n",[113,3767,3768],{"class":115,"line":5},[113,3769,200],{},[19,3771,3772,3773,3776],{},"应用层可以",[23,3774,3775],{},"并行执行","这两个调用，把两个结果一起回给模型。能省一轮往返延迟。陷阱：",[43,3778,3779,3786,3792],{},[46,3780,3781,3782,3785],{},"不是所有 API 都默认开启，要看 ",[110,3783,3784],{},"parallel_tool_calls"," 参数",[46,3787,3788,3791],{},[23,3789,3790],{},"并行的调用之间不能有依赖","（B 的输入需要 A 的输出就不能并行）",[46,3793,3794],{},"模型对\"什么时候适合并行\"判断不总是对——独立查询通常 OK，有顺序的操作（先建用户再发通知）会被错并行",[14,3796,3798],{"id":3797},"structured-outputs-json-mode","Structured Outputs \u002F JSON Mode",[19,3800,3801],{},"Function Calling 的近亲：让模型保证按指定 JSON Schema 输出。区别是不调函数、就要结构化数据本身。",[357,3803,3804,3812],{},[360,3805,3806],{},[363,3807,3808,3810],{},[366,3809,1209],{},[366,3811,1676],{},[375,3813,3814,3825,3836,3844],{},[363,3815,3816,3819],{},[380,3817,3818],{},"OpenAI Structured Outputs",[380,3820,3821,3824],{},[110,3822,3823],{},"response_format: { type: \"json_schema\", strict: true }","，保证 100% 符合 schema",[363,3826,3827,3830],{},[380,3828,3829],{},"OpenAI JSON Mode",[380,3831,3832,3835],{},[110,3833,3834],{},"response_format: { type: \"json_object\" }","，只保证是合法 JSON，结构不保证",[363,3837,3838,3841],{},[380,3839,3840],{},"Anthropic Tool Use",[380,3842,3843],{},"用 tool definition 当 schema 模板，模型一定按 schema 填",[363,3845,3846,3849],{},[380,3847,3848],{},"Google Gemini",[380,3850,3851,3854,3855],{},[110,3852,3853],{},"responseMimeType: \"application\u002Fjson\""," + ",[110,3856,3857],{},"responseSchema",[19,3859,3860,3863],{},[23,3861,3862],{},"用途","：从非结构化文本提取结构化信息（PDF 解析、表单填写、分类打标），比 prompt 让模型\"输出 JSON\"稳定得多。",[14,3865,3867],{"id":3866},"与-mcp-的关系","与 MCP 的关系",[19,3869,3870,3871,3873],{},"Function Calling 是模型层面的能力（模型决定调什么函数）。\n",[31,3872,34],{"href":33}," 是协议层面的标准（标准化函数发现和调用的方式）。",[19,3875,3876],{},"MCP 底层依赖 Function Calling，但提供了更完整的生态：",[43,3878,3879,3882,3885],{},[46,3880,3881],{},"动态发现 Server 能力",[46,3883,3884],{},"标准化的工具\u002F资源\u002F提示词暴露方式",[46,3886,3887],{},"跨工具复用",[14,3889,3891],{"id":3890},"主流模型-fc-能力对比","主流模型 FC 能力对比",[357,3893,3894,3909],{},[360,3895,3896],{},[363,3897,3898,3900,3903,3906],{},[366,3899,2148],{},[366,3901,3902],{},"Parallel",[366,3904,3905],{},"Strict Schema",[366,3907,3908],{},"工具数上限（实测稳定）",[375,3910,3911,3924,3937,3950,3963,3974],{},[363,3912,3913,3916,3918,3921],{},[380,3914,3915],{},"GPT-5 \u002F GPT-4o",[380,3917,2968],{},[380,3919,3920],{},"✅ Structured Outputs",[380,3922,3923],{},"100+",[363,3925,3926,3929,3931,3934],{},[380,3927,3928],{},"Claude Sonnet 4",[380,3930,2968],{},[380,3932,3933],{},"✅ Tool Use",[380,3935,3936],{},"50-100",[363,3938,3939,3942,3944,3947],{},[380,3940,3941],{},"Gemini 2.5 Pro",[380,3943,2968],{},[380,3945,3946],{},"✅ responseSchema",[380,3948,3949],{},"50+",[363,3951,3952,3955,3957,3960],{},[380,3953,3954],{},"DeepSeek V3",[380,3956,2968],{},[380,3958,3959],{},"部分",[380,3961,3962],{},"30-50",[363,3964,3965,3968,3970,3972],{},[380,3966,3967],{},"GLM-5",[380,3969,2968],{},[380,3971,3959],{},[380,3973,3962],{},[363,3975,3976,3979,3982,3984],{},[380,3977,3978],{},"早期开源（Llama 3 等）",[380,3980,3981],{},"⚠️ 部分",[380,3983,2971],{},[380,3985,3986],{},"10-20",[19,3988,3989,3992,3993,3995],{},[23,3990,3991],{},"经验","：工具列表超过 20-30 个，所有模型的选择准确率都会下降。这是为什么 ",[31,3994,795],{"href":748}," 设计里常见\"工具分组 \u002F 分层路由\"——先选一个工具组，再在组内选具体工具。",[14,3997,3998],{"id":3998},"实际应用",[95,4000,4002],{"id":4001},"ai-编程工具","AI 编程工具",[19,4004,4005],{},"Cursor \u002F Claude Code 的 function calling：",[103,4007,4009],{"className":105,"code":4008,"language":107,"meta":108,"style":108},"{\n  \"name\": \"read_file\",\n  \"description\": \"读取文件内容\",\n  \"parameters\": { \"path\": \"string\" }\n}\n{\n  \"name\": \"edit_file\",\n  \"description\": \"编辑文件\",\n  \"parameters\": { \"path\": \"string\", \"old\": \"string\", \"new\": \"string\" }\n}\n{\n  \"name\": \"run_terminal\",\n  \"description\": \"执行终端命令\",\n  \"parameters\": { \"command\": \"string\" }\n}\n",[110,4010,4011,4015,4026,4037,4054,4058,4062,4073,4084,4116,4120,4124,4135,4146,4161],{"__ignoreMap":108},[113,4012,4013],{"class":115,"line":116},[113,4014,120],{"class":119},[113,4016,4017,4019,4021,4024],{"class":115,"line":123},[113,4018,127],{"class":126},[113,4020,130],{"class":119},[113,4022,4023],{"class":133},"\"read_file\"",[113,4025,137],{"class":119},[113,4027,4028,4030,4032,4035],{"class":115,"line":140},[113,4029,143],{"class":126},[113,4031,130],{"class":119},[113,4033,4034],{"class":133},"\"读取文件内容\"",[113,4036,137],{"class":119},[113,4038,4039,4041,4044,4047,4049,4051],{"class":115,"line":153},[113,4040,3527],{"class":126},[113,4042,4043],{"class":119},": { ",[113,4045,4046],{"class":126},"\"path\"",[113,4048,130],{"class":119},[113,4050,3566],{"class":133},[113,4052,4053],{"class":119}," }\n",[113,4055,4056],{"class":115,"line":174},[113,4057,200],{"class":119},[113,4059,4060],{"class":115,"line":187},[113,4061,120],{"class":119},[113,4063,4064,4066,4068,4071],{"class":115,"line":5},[113,4065,127],{"class":126},[113,4067,130],{"class":119},[113,4069,4070],{"class":133},"\"edit_file\"",[113,4072,137],{"class":119},[113,4074,4075,4077,4079,4082],{"class":115,"line":272},[113,4076,143],{"class":126},[113,4078,130],{"class":119},[113,4080,4081],{"class":133},"\"编辑文件\"",[113,4083,137],{"class":119},[113,4085,4086,4088,4090,4092,4094,4096,4098,4101,4103,4105,4107,4110,4112,4114],{"class":115,"line":278},[113,4087,3527],{"class":126},[113,4089,4043],{"class":119},[113,4091,4046],{"class":126},[113,4093,130],{"class":119},[113,4095,3566],{"class":133},[113,4097,165],{"class":119},[113,4099,4100],{"class":126},"\"old\"",[113,4102,130],{"class":119},[113,4104,3566],{"class":133},[113,4106,165],{"class":119},[113,4108,4109],{"class":126},"\"new\"",[113,4111,130],{"class":119},[113,4113,3566],{"class":133},[113,4115,4053],{"class":119},[113,4117,4118],{"class":115,"line":284},[113,4119,200],{"class":119},[113,4121,4122],{"class":115,"line":290},[113,4123,120],{"class":119},[113,4125,4126,4128,4130,4133],{"class":115,"line":296},[113,4127,127],{"class":126},[113,4129,130],{"class":119},[113,4131,4132],{"class":133},"\"run_terminal\"",[113,4134,137],{"class":119},[113,4136,4137,4139,4141,4144],{"class":115,"line":302},[113,4138,143],{"class":126},[113,4140,130],{"class":119},[113,4142,4143],{"class":133},"\"执行终端命令\"",[113,4145,137],{"class":119},[113,4147,4148,4150,4152,4155,4157,4159],{"class":115,"line":307},[113,4149,3527],{"class":126},[113,4151,4043],{"class":119},[113,4153,4154],{"class":126},"\"command\"",[113,4156,130],{"class":119},[113,4158,3566],{"class":133},[113,4160,4053],{"class":119},[113,4162,4163],{"class":115,"line":314},[113,4164,200],{"class":119},[95,4166,1193],{"id":4167},"agent-平台",[19,4169,4170],{},"Coze \u002F Dify 中的自定义工具就是 function calling：",[43,4172,4173,4176,4179],{},[46,4174,4175],{},"定义工具的输入输出",[46,4177,4178],{},"模型自动编排调用顺序",[46,4180,4181],{},"支持多步工具链",[95,4183,4184],{"id":4184},"企业应用",[43,4186,4187,4190,4193],{},[46,4188,4189],{},"查询数据库（自然语言 → SQL → 结果）",[46,4191,4192],{},"调用内部 API（ERP\u002FCRM\u002FOA）",[46,4194,4195],{},"发送通知（邮件\u002F钉钉\u002F飞书）",[14,4197,1403],{"id":1403},[95,4199,4201],{"id":4200},"_1-描述要清晰","1. 描述要清晰",[103,4203,4205],{"className":231,"code":4204,"language":233,"meta":108,"style":108},"\u002F\u002F ❌ 模糊\n{ \"name\": \"search\", \"description\": \"搜索\" }\n\n\u002F\u002F ✅ 清晰\n{ \"name\": \"search_docs\", \"description\": \"在企业知识库中全文搜索文档，返回最相关的 5 条结果\" }\n",[110,4206,4207,4212,4217,4221,4226],{"__ignoreMap":108},[113,4208,4209],{"class":115,"line":116},[113,4210,4211],{},"\u002F\u002F ❌ 模糊\n",[113,4213,4214],{"class":115,"line":123},[113,4215,4216],{},"{ \"name\": \"search\", \"description\": \"搜索\" }\n",[113,4218,4219],{"class":115,"line":140},[113,4220,311],{"emptyLinePlaceholder":310},[113,4222,4223],{"class":115,"line":153},[113,4224,4225],{},"\u002F\u002F ✅ 清晰\n",[113,4227,4228],{"class":115,"line":174},[113,4229,4230],{},"{ \"name\": \"search_docs\", \"description\": \"在企业知识库中全文搜索文档，返回最相关的 5 条结果\" }\n",[95,4232,4234],{"id":4233},"_2-参数类型要明确","2. 参数类型要明确",[103,4236,4238],{"className":231,"code":4237,"language":233,"meta":108,"style":108},"\u002F\u002F ❌ 不清楚枚举值\n{ \"unit\": { \"type\": \"string\" } }\n\n\u002F\u002F ✅ 明确枚举\n{ \"unit\": { \"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"] } }\n",[110,4239,4240,4245,4250,4254,4259],{"__ignoreMap":108},[113,4241,4242],{"class":115,"line":116},[113,4243,4244],{},"\u002F\u002F ❌ 不清楚枚举值\n",[113,4246,4247],{"class":115,"line":123},[113,4248,4249],{},"{ \"unit\": { \"type\": \"string\" } }\n",[113,4251,4252],{"class":115,"line":140},[113,4253,311],{"emptyLinePlaceholder":310},[113,4255,4256],{"class":115,"line":153},[113,4257,4258],{},"\u002F\u002F ✅ 明确枚举\n",[113,4260,4261],{"class":115,"line":174},[113,4262,4263],{},"{ \"unit\": { \"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"] } }\n",[95,4265,4267],{"id":4266},"_3-提供错误处理","3. 提供错误处理",[19,4269,4270],{},"函数执行失败时，返回结构化错误让模型理解：",[103,4272,4274],{"className":105,"code":4273,"language":107,"meta":108,"style":108},"{ \"error\": \"city_not_found\", \"message\": \"找不到城市'上海'\" }\n",[110,4275,4276],{"__ignoreMap":108},[113,4277,4278,4281,4284,4286,4289,4291,4294,4296,4299],{"class":115,"line":116},[113,4279,4280],{"class":119},"{ ",[113,4282,4283],{"class":126},"\"error\"",[113,4285,130],{"class":119},[113,4287,4288],{"class":133},"\"city_not_found\"",[113,4290,165],{"class":119},[113,4292,4293],{"class":126},"\"message\"",[113,4295,130],{"class":119},[113,4297,4298],{"class":133},"\"找不到城市'上海'\"",[113,4300,4053],{"class":119},[19,4302,4303],{},"模型看到结构化错误能自己修正（比如把\"上海\"补成\"上海市\"再试一次）；看到 HTTP 500 那种 stack trace 反而容易卡住。",[95,4305,4307],{"id":4306},"_4-限制函数数量","4. 限制函数数量",[19,4309,4310],{},"一次提供太多函数会让模型困惑。建议：",[43,4312,4313,4316,4319],{},[46,4314,4315],{},"核心函数 5-10 个",[46,4317,4318],{},"用 Agent 模式分步调用",[46,4320,4321],{},"或用 MCP 动态发现",[14,4323,4324],{"id":4324},"调试技巧",[19,4326,4327],{},"线上 FC 不稳定时，按顺序排查：",[59,4329,4330,4340,4353,4362,4372],{},[46,4331,4332,4335,4336,4339],{},[23,4333,4334],{},"看模型是不是真选错了","——把 ",[110,4337,4338],{},"tool_calls"," 完整 dump 出来。常见情况是模型选对了但参数错了。",[46,4341,4342,4335,4345,4348,4349,347],{},[23,4343,4344],{},"降温到 0",[110,4346,4347],{},"temperature"," 设 0 让结果可复现，再调 prompt 和 schema。详见 ",[31,4350,4352],{"href":4351},"\u002Fwiki\u002Ftemperature-top-p.html","Temperature 与 Top-P",[46,4354,4355,558,4358,4361],{},[23,4356,4357],{},"加 description 到具体例子",[110,4359,4360],{},"\"description\": \"查城市天气。例：city='上海' → 返回 {temp, condition}\"","。模型对例子比对类型描述敏感。",[46,4363,4364,4367,4368,4371],{},[23,4365,4366],{},"检查 strict mode","——如果用了 ",[110,4369,4370],{},"strict: true"," 但 schema 写得不严（少 required、字段类型 union），模型会被卡住一直生成不合规的输出。",[46,4373,4374,4377],{},[23,4375,4376],{},"看模型有没有\"幻觉调用\"","——明明没给的工具名也敢叫。这是 prompt 里历史轮里残留了不存在的工具描述，清掉。",[14,4379,733],{"id":733},[43,4381,4382,4387,4393],{},[46,4383,1454,4384,4386],{},[31,4385,34],{"href":33},"——把工具描述从应用搬到 Server",[46,4388,4389,4390,4392],{},"Agent 视角：",[31,4391,749],{"href":748},"——FC 是 Agent 闭环里的「Action」环节",[46,4394,4395,4396],{},"控制随机性：",[31,4397,4352],{"href":4351},[760,4399,4400],{},"html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}",{"title":108,"searchDepth":140,"depth":140,"links":4402},[4403,4404,4410,4411,4412,4413,4414,4419,4425,4426],{"id":3463,"depth":123,"text":3464},{"id":93,"depth":123,"text":93,"children":4405},[4406,4407,4408,4409],{"id":3487,"depth":140,"text":3488},{"id":3612,"depth":140,"text":3613},{"id":3684,"depth":140,"text":3685},{"id":3720,"depth":140,"text":3721},{"id":3727,"depth":123,"text":3728},{"id":3797,"depth":123,"text":3798},{"id":3866,"depth":123,"text":3867},{"id":3890,"depth":123,"text":3891},{"id":3998,"depth":123,"text":3998,"children":4415},[4416,4417,4418],{"id":4001,"depth":140,"text":4002},{"id":4167,"depth":140,"text":1193},{"id":4184,"depth":140,"text":4184},{"id":1403,"depth":123,"text":1403,"children":4420},[4421,4422,4423,4424],{"id":4200,"depth":140,"text":4201},{"id":4233,"depth":140,"text":4234},{"id":4266,"depth":140,"text":4267},{"id":4306,"depth":140,"text":4307},{"id":4324,"depth":123,"text":4324},{"id":733,"depth":123,"text":733},{},"\u002Fwiki\u002Ffunction-calling",[1502,4430],"gpt-4o",[2062,786,787],{"title":3458,"description":108},"function-calling","wiki\u002Ffunction-calling","让大模型根据用户意图自动选择并调用外部函数\u002FAPI 的能力。是 AI Agent 的基础能力，让模型从对话进化到行动。",[757,429,795],"X1q1z61ut0hH0lDleq4y8k2zCCTHyDL9Tqs2Wrq4nMg",{"id":4439,"title":4440,"body":4441,"category":1498,"description":108,"extension":780,"meta":5052,"navigation":310,"path":5053,"published":783,"relatedModels":5054,"relatedTools":784,"seo":5055,"slug":5056,"stem":5057,"summary":5058,"tags":5059,"updated":783,"__hash__":5061},"wiki\u002Fwiki\u002Fhallucination.md","Hallucination（幻觉）",{"type":11,"value":4442,"toc":5024},[4443,4446,4453,4479,4482,4486,4489,4499,4502,4540,4543,4547,4553,4557,4563,4567,4573,4577,4583,4586,4589,4671,4678,4681,4683,4686,4692,4699,4710,4715,4719,4726,4732,4735,4739,4742,4748,4751,4755,4758,4764,4768,4774,4778,4781,4785,4792,4796,4802,4805,4882,4886,4889,4895,4898,4966,4972,4975,4978,5001,5003],[14,4444,4445],{"id":4445},"什么是幻觉",[19,4447,4448,4449,4452],{},"幻觉（Hallucination）是指大模型生成的内容",[23,4450,4451],{},"看起来正确但实际上是错误的","。包括：",[43,4454,4455,4461,4467,4473],{},[46,4456,4457,4460],{},[23,4458,4459],{},"事实性幻觉"," — 编造不存在的事实（\"鲁迅于 1950 年获得诺贝尔文学奖\"）",[46,4462,4463,4466],{},[23,4464,4465],{},"来源性幻觉"," — 虚构引用来源（编造不存在的论文\u002FURL）",[46,4468,4469,4472],{},[23,4470,4471],{},"能力性幻觉"," — 声称能做做不到的事（\"我可以访问互联网\"）",[46,4474,4475,4478],{},[23,4476,4477],{},"代码幻觉"," — 调用不存在的 API\u002F函数\u002F库",[14,4480,4481],{"id":4481},"为什么会产生幻觉",[95,4483,4485],{"id":4484},"根本原因概率生成","根本原因：概率生成",[19,4487,4488],{},"大模型的本质是\"预测下一个最可能的 token\"。它不是在检索事实，而是在做概率推理。当训练数据中缺乏确切信息时，模型会根据语言模式生成\"看起来合理\"的内容。",[19,4490,4491,4494,4495,4498],{},[23,4492,4493],{},"关键认知","：幻觉不是 bug，是 LLM 的",[23,4496,4497],{},"架构特征","。没有任何技术能 100% 消除它，只能压低发生率和影响范围。",[95,4500,4501],{"id":4501},"具体原因",[59,4503,4504,4510,4516,4522,4528,4534],{},[46,4505,4506,4509],{},[23,4507,4508],{},"训练数据不足"," — 对某个话题了解不够，靠\"猜\"",[46,4511,4512,4515],{},[23,4513,4514],{},"知识截止"," — 训练数据有截止日期，不知道最新信息",[46,4517,4518,4521],{},[23,4519,4520],{},"过度泛化"," — 把某个领域的模式错误应用到另一个领域",[46,4523,4524,4527],{},[23,4525,4526],{},"指令模糊"," — 用户的问题不够明确，模型自由发挥",[46,4529,4530,4533],{},[23,4531,4532],{},"上下文冲突"," — 上下文中有矛盾信息，模型选择\"自圆其说\"",[46,4535,4536,4539],{},[23,4537,4538],{},"RLHF 的副作用"," — 训练时被奖励\"自信地回答\"，于是不知道也硬答",[14,4541,4542],{"id":4542},"幻觉的典型表现",[95,4544,4546],{"id":4545},"_1-编造-api","1. 编造 API",[103,4548,4551],{"className":4549,"code":4550,"language":220},[218],"用户：FastAPI 怎么做 WebSocket 广播？\n模型：使用 FastAPI 的 broadcast() 方法...  ← 这个方法不存在\n",[110,4552,4550],{"__ignoreMap":108},[95,4554,4556],{"id":4555},"_2-虚构引用","2. 虚构引用",[103,4558,4561],{"className":4559,"code":4560,"language":220},[218],"用户：注意力机制最早是谁提出的？\n模型：根据 Smith et al. (2017) 的论文...  ← 这篇论文不存在\n",[110,4562,4560],{"__ignoreMap":108},[95,4564,4566],{"id":4565},"_3-混淆概念","3. 混淆概念",[103,4568,4571],{"className":4569,"code":4570,"language":220},[218],"用户：MCP 和 A2A 有什么区别？\n模型：MCP 是 Google 提出的...  ← MCP 是 Anthropic 提出的\n",[110,4572,4570],{"__ignoreMap":108},[95,4574,4576],{"id":4575},"_4-数字-单位幻觉","4. 数字 \u002F 单位幻觉",[103,4578,4581],{"className":4579,"code":4580,"language":220},[218],"用户：GPT-4 的训练数据有多少 token？\n模型：GPT-4 的训练数据约 13 万亿 token...  ← 数字凭感觉\n",[110,4582,4580],{"__ignoreMap":108},[14,4584,4585],{"id":4585},"主流幻觉评测基准",[19,4587,4588],{},"知道怎么衡量，才能比较不同方案。常用基准：",[357,4590,4591,4603],{},[360,4592,4593],{},[363,4594,4595,4598,4600],{},[366,4596,4597],{},"基准",[366,4599,1943],{},[366,4601,4602],{},"评测方法",[375,4604,4605,4618,4631,4644,4658],{},[363,4606,4607,4612,4615],{},[380,4608,4609],{},[23,4610,4611],{},"TruthfulQA",[380,4613,4614],{},"模型在易误导问题上的真实性",[380,4616,4617],{},"多选 + 人评，看是否被常见误解带偏",[363,4619,4620,4625,4628],{},[380,4621,4622],{},[23,4623,4624],{},"HaluEval",[380,4626,4627],{},"摘要 \u002F QA \u002F 对话三类场景的幻觉率",[380,4629,4630],{},"生成 vs 标注事实对比",[363,4632,4633,4638,4641],{},[380,4634,4635],{},[23,4636,4637],{},"FActScore",[380,4639,4640],{},"长文事实密度",[380,4642,4643],{},"拆原子事实再 verify",[363,4645,4646,4652,4655],{},[380,4647,4648,4651],{},[23,4649,4650],{},"SimpleQA","（OpenAI）",[380,4653,4654],{},"开放问答的事实准确率",[380,4656,4657],{},"标准答案匹配",[363,4659,4660,4665,4668],{},[380,4661,4662],{},[23,4663,4664],{},"Vectara HHEM",[380,4666,4667],{},"摘要场景幻觉",[380,4669,4670],{},"专门训练的 detector 打分",[19,4672,4673,4674,4677],{},"实务中",[23,4675,4676],{},"不要只看一个基准","。模型 A 在 TruthfulQA 高、SimpleQA 低，说明它\"会拒答易错题但事实知识不丰富\"——这跟你的业务匹配吗？",[14,4679,4680],{"id":4680},"如何缓解幻觉",[95,4682,2569],{"id":2568},[19,4684,4685],{},"最有效的方法。在生成回答前先检索知识库，让模型基于真实文档回答。",[103,4687,4690],{"className":4688,"code":4689,"language":220},[218],"用户提问 → 检索知识库 → 检索到的文档 + 问题 → 模型生成 → 引用来源\n",[110,4691,4689],{"__ignoreMap":108},[19,4693,4694,4695,4698],{},"注意：RAG ",[23,4696,4697],{},"降低而非消除","幻觉。模型仍然可能：",[43,4700,4701,4704,4707],{},[46,4702,4703],{},"忽略检索结果，按自己\"知道的\"答",[46,4705,4706],{},"把检索结果里的信息张冠李戴",[46,4708,4709],{},"检索没命中时强答",[19,4711,4712,4713,347],{},"详见 ",[31,4714,2024],{"href":2023},[95,4716,4718],{"id":4717},"_2-grounded-generation强制引用","2. Grounded Generation：强制引用",[19,4720,4721,4722,4725],{},"在 prompt 里要求",[23,4723,4724],{},"每一句话都标注来源","，没有来源的不能说：",[103,4727,4730],{"className":4728,"code":4729,"language":220},[218],"你必须严格遵守：\n1. 只基于 \u003Ccontext> 内的信息回答\n2. 每个事实陈述后用 [doc-1] [doc-2] 标注来源\n3. 如果 context 里没有，回答\"提供的资料未涵盖这一点\"\n4. 不要补充任何 context 外的\"背景知识\"\n",[110,4731,4729],{"__ignoreMap":108},[19,4733,4734],{},"GPT-5 \u002F Claude Sonnet 4 \u002F Gemini 2.5 对此类指令服从度较高，老模型容易\"指令听了一半\"。",[95,4736,4738],{"id":4737},"_3-verifier-二阶段","3. Verifier 二阶段",[19,4740,4741],{},"让另一个 LLM（甚至同一个 LLM 第二轮）专门检查首轮输出：",[103,4743,4746],{"className":4744,"code":4745,"language":220},[218],"[生成 Agent]            [Verifier Agent]\n回答 +  ─────────────►  逐句核对 context\n引用                    输出：✓ 准确 \u002F ✗ 幻觉句\n                          ↓\n                       有幻觉则要求重写\n",[110,4747,4745],{"__ignoreMap":108},[19,4749,4750],{},"代价：token 翻倍 + 延迟翻倍。但对法律 \u002F 医疗 \u002F 金融场景是必要投入。",[95,4752,4754],{"id":4753},"_4-要求标注不确定性","4. 要求标注不确定性",[19,4756,4757],{},"在 prompt 中要求模型标注信心程度：",[103,4759,4762],{"className":4760,"code":4761,"language":220},[218],"回答时标注你的信心程度：\n[确定] 基于事实的回答\n[推测] 基于推理的推测\n[不确定] 缺乏足够信息\n",[110,4763,4761],{"__ignoreMap":108},[95,4765,4767],{"id":4766},"_5-限定回答范围","5. 限定回答范围",[103,4769,4772],{"className":4770,"code":4771,"language":220},[218],"如果不知道，直接说\"我不知道\"。\n不要编造信息。\n只基于提供的上下文回答。\n",[110,4773,4771],{"__ignoreMap":108},[95,4775,4777],{"id":4776},"_6-交叉验证","6. 交叉验证",[19,4779,4780],{},"对同一问题多次提问（不同温度 \u002F 不同 prompt 变体），比较答案的一致性。不一致的部分大概率含幻觉。",[95,4782,4784],{"id":4783},"_7-使用推理模型","7. 使用推理模型",[19,4786,4787,4788,4791],{},"DeepSeek-R1、GPT-5、Claude Opus 4 thinking 等推理模型在回答前会先\"想一想\"，幻觉率显著低于非推理模型——但",[23,4789,4790],{},"会用更多 token","，按场景权衡。",[95,4793,4795],{"id":4794},"_8-温度调低","8. 温度调低",[19,4797,4798,4799,4801],{},"将 ",[31,4800,4347],{"href":4351}," 设为 0 或 0.1，减少随机性，让模型更\"保守\"。",[14,4803,4804],{"id":4804},"不同模型的幻觉率",[357,4806,4807,4818],{},[360,4808,4809],{},[363,4810,4811,4813,4816],{},[366,4812,2148],{},[366,4814,4815],{},"幻觉率",[366,4817,1215],{},[375,4819,4820,4830,4840,4851,4860,4871],{},[363,4821,4822,4824,4827],{},[380,4823,3928],{},[380,4825,4826],{},"低",[380,4828,4829],{},"安全性设计好，不确定时倾向说不知道",[363,4831,4832,4835,4837],{},[380,4833,4834],{},"GPT-5",[380,4836,4826],{},[380,4838,4839],{},"推理能力强，幻觉少",[363,4841,4842,4845,4848],{},[380,4843,4844],{},"GPT-4o",[380,4846,4847],{},"中",[380,4849,4850],{},"偶尔编造，交叉验证可发现",[363,4852,4853,4855,4857],{},[380,4854,3941],{},[380,4856,4847],{},[380,4858,4859],{},"长上下文下\"中间遗忘\"导致幻觉",[363,4861,4862,4865,4868],{},[380,4863,4864],{},"推理模型（o3 \u002F R1 \u002F Claude thinking）",[380,4866,4867],{},"显著降低",[380,4869,4870],{},"思维链过程会自我校验",[363,4872,4873,4876,4879],{},[380,4874,4875],{},"国产基础模型",[380,4877,4878],{},"中-高",[380,4880,4881],{},"英文场景幻觉率更高、专业术语易错",[14,4883,4885],{"id":4884},"生产环境检测-pipeline","生产环境检测 Pipeline",[19,4887,4888],{},"把\"靠运气\"变成\"系统化防御\"，典型四层：",[103,4890,4893],{"className":4891,"code":4892,"language":220},[218],"用户问 → [1] Pre-Retrieval  → [2] Generation  → [3] Post-Check  → 返回\n                ↓ 检索        ↓ grounded gen     ↓ 自动 verifier\n                              ↓ 强制引用         ↓ 不通过 → 降级回复\n",[110,4894,4892],{"__ignoreMap":108},[19,4896,4897],{},"具体配置：",[357,4899,4900,4912],{},[360,4901,4902],{},[363,4903,4904,4907,4909],{},[366,4905,4906],{},"层",[366,4908,3021],{},[366,4910,4911],{},"工具\u002F手段",[375,4913,4914,4927,4940,4953],{},[363,4915,4916,4921,4924],{},[380,4917,4918],{},[23,4919,4920],{},"Pre-Retrieval",[380,4922,4923],{},"没检索到时直接拒答而非硬猜",[380,4925,4926],{},"retrieval score 阈值、空召回兜底",[363,4928,4929,4934,4937],{},[380,4930,4931],{},[23,4932,4933],{},"Generation",[380,4935,4936],{},"grounded prompt + 强制 citation",[380,4938,4939],{},"system prompt 模板",[363,4941,4942,4947,4950],{},[380,4943,4944],{},[23,4945,4946],{},"Post-Check",[380,4948,4949],{},"句级 verifier 校对 context",[380,4951,4952],{},"LLM-as-judge \u002F Vectara HHEM \u002F 规则匹配",[363,4954,4955,4960,4963],{},[380,4956,4957],{},[23,4958,4959],{},"Logging",[380,4961,4962],{},"记录幻觉案例、归因复盘",[380,4964,4965],{},"把 Post-Check 不通过样本采样到评测集",[19,4967,4968,4971],{},[23,4969,4970],{},"经验值","：加 Post-Check 一般能把幻觉率再压 30-50%，代价是延迟 +20-40%、成本 +50-80%。",[14,4973,4974],{"id":4974},"开发者的幻觉检测清单",[19,4976,4977],{},"在应用中处理模型输出时，检查以下信号：",[59,4979,4980,4983,4986,4989,4992,4995,4998],{},[46,4981,4982],{},"✅ 回答中是否包含可验证的事实？→ 查证",[46,4984,4985],{},"✅ 是否引用了 URL\u002F论文\u002F文档？→ 验证是否存在",[46,4987,4988],{},"✅ 代码是否调用了 API\u002F函数？→ 查文档确认",[46,4990,4991],{},"✅ 回答是否与已知事实矛盾？→ 标记冲突",[46,4993,4994],{},"✅ 模型是否说\"我不知道\"？→ 这是好信号，不要惩罚",[46,4996,4997],{},"✅ 数字 \u002F 日期 \u002F 单位是否合理？→ 数量级粗算",[46,4999,5000],{},"✅ 多次重复提问，结果是否稳定？→ 答案飘忽 = 高幻觉风险",[14,5002,733],{"id":733},[43,5004,5005,5010,5014,5018],{},[46,5006,5007,5008],{},"缓解方案：",[31,5009,2782],{"href":2023},[46,5011,4395,5012],{},[31,5013,4352],{"href":4351},[46,5015,2785,5016],{},[31,5017,1473],{"href":1472},[46,5019,5020,5021],{},"行为微调：",[31,5022,2843],{"href":5023},"\u002Fwiki\u002Ffine-tuning-vs-rag.html",{"title":108,"searchDepth":140,"depth":140,"links":5025},[5026,5027,5031,5037,5038,5048,5049,5050,5051],{"id":4445,"depth":123,"text":4445},{"id":4481,"depth":123,"text":4481,"children":5028},[5029,5030],{"id":4484,"depth":140,"text":4485},{"id":4501,"depth":140,"text":4501},{"id":4542,"depth":123,"text":4542,"children":5032},[5033,5034,5035,5036],{"id":4545,"depth":140,"text":4546},{"id":4555,"depth":140,"text":4556},{"id":4565,"depth":140,"text":4566},{"id":4575,"depth":140,"text":4576},{"id":4585,"depth":123,"text":4585},{"id":4680,"depth":123,"text":4680,"children":5039},[5040,5041,5042,5043,5044,5045,5046,5047],{"id":2568,"depth":140,"text":2569},{"id":4717,"depth":140,"text":4718},{"id":4737,"depth":140,"text":4738},{"id":4753,"depth":140,"text":4754},{"id":4766,"depth":140,"text":4767},{"id":4776,"depth":140,"text":4777},{"id":4783,"depth":140,"text":4784},{"id":4794,"depth":140,"text":4795},{"id":4804,"depth":123,"text":4804},{"id":4884,"depth":123,"text":4885},{"id":4974,"depth":123,"text":4974},{"id":733,"depth":123,"text":733},{},"\u002Fwiki\u002Fhallucination",[1502,1503],{"title":4440,"description":108},"hallucination","wiki\u002Fhallucination","大模型生成看似合理但事实上错误或虚构的内容。幻觉是 LLM 最大的可靠性挑战，无法完全消除，但可以通过多种方法缓解。",[5060,1375,2024],"幻觉","F8svYQF3vhbhnhHvCWK75hp0TltZ7PhOCBdUT-W_a7k",{"id":5063,"title":3402,"body":5064,"category":2057,"description":108,"extension":780,"meta":6100,"navigation":310,"path":6101,"published":783,"relatedModels":6102,"relatedTools":784,"seo":6105,"slug":6106,"stem":6107,"summary":6108,"tags":6109,"updated":783,"__hash__":6112},"wiki\u002Fwiki\u002Flora.md",{"type":11,"value":5065,"toc":6070},[5066,5070,5076,5079,5081,5084,5087,5098,5102,5105,5111,5114,5122,5127,5130,5134,5182,5186,5189,5193,5196,5207,5211,5214,5218,5221,5317,5322,5325,5329,5332,5336,5339,5371,5375,5378,5429,5433,5436,5505,5512,5516,5519,5574,5577,5581,5584,5663,5669,5673,5676,5682,5689,5700,5703,5706,5710,5713,5821,5825,5959,5963,5966,5970,6045,6047,6067],[14,5067,5069],{"id":5068},"什么是-lora","什么是 LoRA",[19,5071,5072,5073,347],{},"LoRA（Low-Rank Adaptation）是一种参数高效微调方法。核心思想：",[23,5074,5075],{},"不修改原始模型权重，而是在旁边加一个很小的\"适配器\"矩阵来学习任务特定知识",[19,5077,5078],{},"打个比方：原模型是一本教科书（不能改写），LoRA 是你在书页边写的笔记——不改原书，但补充了特定场景的知识。",[14,5080,2362],{"id":2362},[95,5082,5083],{"id":5083},"传统微调",[19,5085,5086],{},"全量微调要更新模型所有参数。一个 70B 模型有 700 亿参数，全部更新需要：",[43,5088,5089,5092,5095],{},[46,5090,5091],{},"700 亿参数的梯度计算",[46,5093,5094],{},"700 亿参数的优化器状态",[46,5096,5097],{},"至少 8×A100 GPU",[95,5099,5101],{"id":5100},"lora-微调","LoRA 微调",[19,5103,5104],{},"LoRA 的数学原理：大模型权重矩阵 W 的更新可以用两个小矩阵 A × B 来近似：",[103,5106,5109],{"className":5107,"code":5108,"language":220},[218],"W' = W + A × B\n\nW: 原始权重 (d×d)，冻结不动\nA: 降维矩阵 (d×r)，需要训练\nB: 升维矩阵 (r×d)，需要训练\nr: 秩（rank），通常 8-64\n",[110,5110,5108],{"__ignoreMap":108},[19,5112,5113],{},"当 r=8, d=4096 时：",[43,5115,5116,5119],{},[46,5117,5118],{},"原始参数：4096×4096 = 1678 万",[46,5120,5121],{},"LoRA 参数：4096×8 + 8×4096 = 6.6 万",[19,5123,5124,347],{},[23,5125,5126],{},"训练参数减少 99.6%",[14,5128,5129],{"id":5129},"优势",[95,5131,5133],{"id":5132},"_1-显存大幅降低","1. 显存大幅降低",[357,5135,5136,5148],{},[360,5137,5138],{},[363,5139,5140,5142,5145],{},[366,5141,2297],{},[366,5143,5144],{},"70B 模型显存",[366,5146,5147],{},"GPU 需求",[375,5149,5150,5161,5171],{},[363,5151,5152,5155,5158],{},[380,5153,5154],{},"全量微调",[380,5156,5157],{},"~500GB",[380,5159,5160],{},"8×A100 80G",[363,5162,5163,5165,5168],{},[380,5164,5101],{},[380,5166,5167],{},"~80GB",[380,5169,5170],{},"1×A100 80G",[363,5172,5173,5176,5179],{},[380,5174,5175],{},"QLoRA（量化+LoRA）",[380,5177,5178],{},"~24GB",[380,5180,5181],{},"1×RTX 4090",[95,5183,5185],{"id":5184},"_2-训练速度快","2. 训练速度快",[19,5187,5188],{},"参数少，梯度计算和优化器更新都更快。通常比全量微调快 2-3 倍。",[95,5190,5192],{"id":5191},"_3-可插拔","3. 可插拔",[19,5194,5195],{},"LoRA 适配器是一个独立的小文件（几十 MB），可以：",[43,5197,5198,5201,5204],{},[46,5199,5200],{},"随时加载\u002F卸载",[46,5202,5203],{},"多个 LoRA 切换使用",[46,5205,5206],{},"不同用户用不同 LoRA",[95,5208,5210],{"id":5209},"_4-不破坏原模型","4. 不破坏原模型",[19,5212,5213],{},"原模型权重不变，可以随时回到原始状态。多个 LoRA 可以叠加。",[14,5215,5217],{"id":5216},"lora-家族lora-qlora-dora-adalora","LoRA 家族：LoRA \u002F QLoRA \u002F DoRA \u002F AdaLoRA",[19,5219,5220],{},"LoRA 出来后衍生了一堆变体，按需选用：",[357,5222,5223,5237],{},[360,5224,5225],{},[363,5226,5227,5229,5232,5235],{},[366,5228,2297],{},[366,5230,5231],{},"核心改动",[366,5233,5234],{},"相对 LoRA 的变化",[366,5236,2438],{},[375,5238,5239,5253,5269,5285,5301],{},[363,5240,5241,5245,5248,5250],{},[380,5242,5243],{},[23,5244,3085],{},[380,5246,5247],{},"基础版",[380,5249,2465],{},[380,5251,5252],{},"通用、入门首选",[363,5254,5255,5260,5263,5266],{},[380,5256,5257],{},[23,5258,5259],{},"QLoRA",[380,5261,5262],{},"把原模型量化到 4bit 再加 LoRA",[380,5264,5265],{},"显存 -70%，速度略慢",[380,5267,5268],{},"消费级 GPU",[363,5270,5271,5276,5279,5282],{},[380,5272,5273],{},[23,5274,5275],{},"DoRA",[380,5277,5278],{},"分解为「方向 + 大小」分别学",[380,5280,5281],{},"同参数量下效果接近全量微调",[380,5283,5284],{},"追求效果上限",[363,5286,5287,5292,5295,5298],{},[380,5288,5289],{},[23,5290,5291],{},"AdaLoRA",[380,5293,5294],{},"训练中动态调整每层的 rank",[380,5296,5297],{},"自动找最优 rank 分配",[380,5299,5300],{},"不想手调 rank",[363,5302,5303,5308,5311,5314],{},[380,5304,5305],{},[23,5306,5307],{},"VeRA",[380,5309,5310],{},"共享随机矩阵 + 缩放向量",[380,5312,5313],{},"参数量再降 10x",[380,5315,5316],{},"多 LoRA 大量部署",[19,5318,5319,5321],{},[23,5320,3991],{},"：80% 场景 LoRA 就够，显存不够上 QLoRA。DoRA 适合 LoRA 效果不到位但又不想全量微调的中间地带。",[14,5323,5324],{"id":5324},"劣势",[95,5326,5328],{"id":5327},"_1-效果略逊全量微调","1. 效果略逊全量微调",[19,5330,5331],{},"LoRA 在大多数任务上接近全量微调，但在需要大幅改变模型行为的场景（如全新语言学习）可能不如全量。",[95,5333,5335],{"id":5334},"_2-需要调参","2. 需要调参",[19,5337,5338],{},"LoRA 的关键参数：",[43,5340,5341,5347,5353,5359,5365],{},[46,5342,5343,5346],{},[23,5344,5345],{},"r（秩）","：8-64，越大效果越好但参数越多",[46,5348,5349,5352],{},[23,5350,5351],{},"alpha","：缩放因子，通常设为 r 的 2 倍",[46,5354,5355,5358],{},[23,5356,5357],{},"target_modules","：对哪些层加 LoRA（通常选 q_proj, v_proj）",[46,5360,5361,5364],{},[23,5362,5363],{},"dropout","：0.0-0.1，防过拟合",[46,5366,5367,5370],{},[23,5368,5369],{},"学习率","：1e-4 到 5e-4（比全量微调高一个数量级）",[95,5372,5374],{"id":5373},"_3-推理时需要决定合不合并","3. 推理时需要决定合不合并",[19,5376,5377],{},"LoRA 推理时有两种姿势，各有取舍：",[357,5379,5380,5394],{},[360,5381,5382],{},[363,5383,5384,5386,5389,5392],{},[366,5385,3260],{},[366,5387,5388],{},"推理性能",[366,5390,5391],{},"灵活性",[366,5393,2438],{},[375,5395,5396,5413],{},[363,5397,5398,5404,5407,5410],{},[380,5399,5400,5403],{},[23,5401,5402],{},"合并到原模型","（merge）",[380,5405,5406],{},"与原模型完全一致，零开销",[380,5408,5409],{},"死板，换 LoRA 要重新合并、重新部署",[380,5411,5412],{},"单一固定 LoRA、生产稳定环境",[363,5414,5415,5420,5423,5426],{},[380,5416,5417],{},[23,5418,5419],{},"运行时加载",[380,5421,5422],{},"多一次矩阵加法，延迟 +5-15%",[380,5424,5425],{},"同进程切换多个 LoRA、A\u002FB 测试",[380,5427,5428],{},"多租户 \u002F 多版本场景",[14,5430,5432],{"id":5431},"target_modules-怎么选","target_modules 怎么选",[19,5434,5435],{},"这是 LoRA 调参里最玄学也最关键的一项。常见组合：",[357,5437,5438,5453],{},[360,5439,5440],{},[363,5441,5442,5445,5448,5450],{},[366,5443,5444],{},"组合",[366,5446,5447],{},"训练参数",[366,5449,1853],{},[366,5451,5452],{},"何时用",[375,5454,5455,5471,5487],{},[363,5456,5457,5463,5466,5468],{},[380,5458,5459,5460],{},"只 ",[110,5461,5462],{},"q_proj, v_proj",[380,5464,5465],{},"最少",[380,5467,2957],{},[380,5469,5470],{},"显存极紧 \u002F 快速实验",[363,5472,5473,5479,5481,5484],{},[380,5474,5475,5478],{},[110,5476,5477],{},"q_proj, k_proj, v_proj, o_proj","（全注意力）",[380,5480,4847],{},[380,5482,5483],{},"较好",[380,5485,5486],{},"通用推荐起点",[363,5488,5489,5496,5499,5502],{},[380,5490,5491,5492,5495],{},"全注意力 + MLP（",[110,5493,5494],{},"gate_proj, up_proj, down_proj","）",[380,5497,5498],{},"多",[380,5500,5501],{},"接近全量微调",[380,5503,5504],{},"任务复杂、显存够",[19,5506,5507,5508,5511],{},"LLaMA-Factory \u002F PEFT 都允许 ",[110,5509,5510],{},"target_modules=\"all-linear\""," 一把梭，对小模型（\u003C 13B）通常直接用这个最省心。",[14,5513,5515],{"id":5514},"qlora4bit-量化-lora","QLoRA：4bit 量化 + LoRA",[19,5517,5518],{},"QLoRA 是 LoRA 的进一步优化：把原模型量化到 4bit 再做 LoRA 微调。",[357,5520,5521,5532],{},[360,5522,5523],{},[363,5524,5525,5528,5530],{},[366,5526,5527],{},"指标",[366,5529,3085],{},[366,5531,5259],{},[375,5533,5534,5544,5554,5564],{},[363,5535,5536,5538,5541],{},[380,5537,5144],{},[380,5539,5540],{},"80GB",[380,5542,5543],{},"24GB",[363,5545,5546,5548,5551],{},[380,5547,5147],{},[380,5549,5550],{},"A100",[380,5552,5553],{},"RTX 4090",[363,5555,5556,5559,5561],{},[380,5557,5558],{},"效果损失",[380,5560,1024],{},[380,5562,5563],{},"\u003C1%",[363,5565,5566,5569,5571],{},[380,5567,5568],{},"训练速度",[380,5570,4597],{},[380,5572,5573],{},"慢 20%",[19,5575,5576],{},"QLoRA 让在消费级 GPU（RTX 4090\u002F3090）上微调 70B 模型成为可能。",[14,5578,5580],{"id":5579},"推理服务的-lora-支持","推理服务的 LoRA 支持",[19,5582,5583],{},"把 LoRA 跑到生产，关键看推理框架支不支持多 LoRA 动态切换：",[357,5585,5586,5599],{},[360,5587,5588],{},[363,5589,5590,5593,5596],{},[366,5591,5592],{},"框架",[366,5594,5595],{},"LoRA 支持",[366,5597,5598],{},"多 LoRA 热加载",[375,5600,5601,5613,5626,5638,5651],{},[363,5602,5603,5608,5610],{},[380,5604,5605],{},[23,5606,5607],{},"vLLM",[380,5609,2968],{},[380,5611,5612],{},"✅ 一份基础模型 + 多个 LoRA 同时服务",[363,5614,5615,5621,5623],{},[380,5616,5617,5620],{},[23,5618,5619],{},"TGI","（HuggingFace）",[380,5622,2968],{},[380,5624,5625],{},"✅ 支持运行时切换",[363,5627,5628,5633,5635],{},[380,5629,5630],{},[23,5631,5632],{},"TensorRT-LLM",[380,5634,2968],{},[380,5636,5637],{},"⚠️ 需要预编译",[363,5639,5640,5645,5648],{},[380,5641,5642],{},[23,5643,5644],{},"llama.cpp",[380,5646,5647],{},"✅ GGUF + LoRA",[380,5649,5650],{},"⚠️ 通常建议合并后再用",[363,5652,5653,5658,5661],{},[380,5654,5655],{},[23,5656,5657],{},"Ollama",[380,5659,5660],{},"⚠️ 通常合并",[380,5662,2971],{},[19,5664,5665,5668],{},[23,5666,5667],{},"真正的杀手锏场景","：vLLM 的多 LoRA 部署。1 张 80G A100 可以同时服务 1 个 70B 基础模型 + 几十个 LoRA Adapter，不同用户请求路由到不同 LoRA。SaaS 化 fine-tune 服务（Together、Modal 等）背后基本都是这套。",[14,5670,5672],{"id":5671},"多-adapter-切换的实战案例","多 Adapter 切换的实战案例",[19,5674,5675],{},"一个客服系统服务多个品牌客户：",[103,5677,5680],{"className":5678,"code":5679,"language":220},[218],"共享基础模型: Qwen-2.5-7B\n  ├─ adapter_brand_A.safetensors (40MB)   ← A 品牌话术 \u002F 知识\n  ├─ adapter_brand_B.safetensors (40MB)   ← B 品牌话术 \u002F 知识\n  └─ adapter_brand_C.safetensors (40MB)\n",[110,5681,5679],{"__ignoreMap":108},[19,5683,5684,5685,5688],{},"请求时按 ",[110,5686,5687],{},"brand"," 字段路由到对应 LoRA。优势：",[43,5690,5691,5694,5697],{},[46,5692,5693],{},"7B 基础模型只占一份 GPU 显存",[46,5695,5696],{},"新增品牌只要训练一个 40MB 的 LoRA，几小时上线",[46,5698,5699],{},"任一品牌出问题，回滚单个 LoRA 即可，不影响其他",[19,5701,5702],{},"如果用全量微调，要么每个品牌一份 14GB 模型权重（贵），要么混在一起训（容易互相干扰）。",[14,5704,5705],{"id":5705},"实操工具",[95,5707,5709],{"id":5708},"llama-factory","LLaMA-Factory",[19,5711,5712],{},"开箱即用的 LoRA 微调工具：",[103,5714,5718],{"className":5715,"code":5716,"language":5717,"meta":108,"style":108},"language-bash shiki shiki-themes github-light github-dark","# 安装\npip install llama-factory\n\n# Web UI 启动\nllamafactory-cli webui\n\n# 命令行微调 Qwen\nllamafactory-cli train \\\n  --model_name_or_path Qwen\u002FQwen2.5-7B \\\n  --finetuning_type lora \\\n  --lora_rank 8 \\\n  --dataset your_dataset \\\n  --output_dir .\u002Foutput\n","bash",[110,5719,5720,5725,5737,5741,5746,5754,5758,5763,5773,5783,5793,5803,5813],{"__ignoreMap":108},[113,5721,5722],{"class":115,"line":116},[113,5723,5724],{"class":3716},"# 安装\n",[113,5726,5727,5731,5734],{"class":115,"line":123},[113,5728,5730],{"class":5729},"sScJk","pip",[113,5732,5733],{"class":133}," install",[113,5735,5736],{"class":133}," llama-factory\n",[113,5738,5739],{"class":115,"line":140},[113,5740,311],{"emptyLinePlaceholder":310},[113,5742,5743],{"class":115,"line":153},[113,5744,5745],{"class":3716},"# Web UI 启动\n",[113,5747,5748,5751],{"class":115,"line":174},[113,5749,5750],{"class":5729},"llamafactory-cli",[113,5752,5753],{"class":133}," webui\n",[113,5755,5756],{"class":115,"line":187},[113,5757,311],{"emptyLinePlaceholder":310},[113,5759,5760],{"class":115,"line":5},[113,5761,5762],{"class":3716},"# 命令行微调 Qwen\n",[113,5764,5765,5767,5770],{"class":115,"line":272},[113,5766,5750],{"class":5729},[113,5768,5769],{"class":133}," train",[113,5771,5772],{"class":126}," \\\n",[113,5774,5775,5778,5781],{"class":115,"line":278},[113,5776,5777],{"class":126},"  --model_name_or_path",[113,5779,5780],{"class":133}," Qwen\u002FQwen2.5-7B",[113,5782,5772],{"class":126},[113,5784,5785,5788,5791],{"class":115,"line":284},[113,5786,5787],{"class":126},"  --finetuning_type",[113,5789,5790],{"class":133}," lora",[113,5792,5772],{"class":126},[113,5794,5795,5798,5801],{"class":115,"line":290},[113,5796,5797],{"class":126},"  --lora_rank",[113,5799,5800],{"class":126}," 8",[113,5802,5772],{"class":126},[113,5804,5805,5808,5811],{"class":115,"line":296},[113,5806,5807],{"class":126},"  --dataset",[113,5809,5810],{"class":133}," your_dataset",[113,5812,5772],{"class":126},[113,5814,5815,5818],{"class":115,"line":302},[113,5816,5817],{"class":126},"  --output_dir",[113,5819,5820],{"class":133}," .\u002Foutput\n",[95,5822,5824],{"id":5823},"peft-hugging-face","PEFT (Hugging Face)",[103,5826,5828],{"className":3691,"code":5827,"language":3693,"meta":108,"style":108},"from peft import LoraConfig, get_peft_model\nfrom transformers import AutoModelForCausalLM\n\nmodel = AutoModelForCausalLM.from_pretrained(\"Qwen\u002FQwen2.5-7B\")\nconfig = LoraConfig(\n    r=8, lora_alpha=16,\n    target_modules=[\"q_proj\", \"v_proj\"],\n    lora_dropout=0.05,\n)\nmodel = get_peft_model(model, config)\n# 训练...\n",[110,5829,5830,5844,5856,5860,5876,5886,5909,5929,5941,5945,5954],{"__ignoreMap":108},[113,5831,5832,5835,5838,5841],{"class":115,"line":116},[113,5833,5834],{"class":3703},"from",[113,5836,5837],{"class":119}," peft ",[113,5839,5840],{"class":3703},"import",[113,5842,5843],{"class":119}," LoraConfig, get_peft_model\n",[113,5845,5846,5848,5851,5853],{"class":115,"line":123},[113,5847,5834],{"class":3703},[113,5849,5850],{"class":119}," transformers ",[113,5852,5840],{"class":3703},[113,5854,5855],{"class":119}," AutoModelForCausalLM\n",[113,5857,5858],{"class":115,"line":140},[113,5859,311],{"emptyLinePlaceholder":310},[113,5861,5862,5865,5867,5870,5873],{"class":115,"line":153},[113,5863,5864],{"class":119},"model ",[113,5866,3704],{"class":3703},[113,5868,5869],{"class":119}," AutoModelForCausalLM.from_pretrained(",[113,5871,5872],{"class":133},"\"Qwen\u002FQwen2.5-7B\"",[113,5874,5875],{"class":119},")\n",[113,5877,5878,5881,5883],{"class":115,"line":174},[113,5879,5880],{"class":119},"config ",[113,5882,3704],{"class":3703},[113,5884,5885],{"class":119}," LoraConfig(\n",[113,5887,5888,5892,5894,5897,5899,5902,5904,5907],{"class":115,"line":187},[113,5889,5891],{"class":5890},"s4XuR","    r",[113,5893,3704],{"class":3703},[113,5895,5896],{"class":126},"8",[113,5898,165],{"class":119},[113,5900,5901],{"class":5890},"lora_alpha",[113,5903,3704],{"class":3703},[113,5905,5906],{"class":126},"16",[113,5908,137],{"class":119},[113,5910,5911,5914,5916,5919,5922,5924,5927],{"class":115,"line":5},[113,5912,5913],{"class":5890},"    target_modules",[113,5915,3704],{"class":3703},[113,5917,5918],{"class":119},"[",[113,5920,5921],{"class":133},"\"q_proj\"",[113,5923,165],{"class":119},[113,5925,5926],{"class":133},"\"v_proj\"",[113,5928,171],{"class":119},[113,5930,5931,5934,5936,5939],{"class":115,"line":272},[113,5932,5933],{"class":5890},"    lora_dropout",[113,5935,3704],{"class":3703},[113,5937,5938],{"class":126},"0.05",[113,5940,137],{"class":119},[113,5942,5943],{"class":115,"line":278},[113,5944,5875],{"class":119},[113,5946,5947,5949,5951],{"class":115,"line":284},[113,5948,5864],{"class":119},[113,5950,3704],{"class":3703},[113,5952,5953],{"class":119}," get_peft_model(model, config)\n",[113,5955,5956],{"class":115,"line":290},[113,5957,5958],{"class":3716},"# 训练...\n",[95,5960,5962],{"id":5961},"axolotl","Axolotl",[19,5964,5965],{},"YAML 驱动的训练框架，社区配方丰富，适合追新（DoRA \u002F 新优化器先到）。",[14,5967,5969],{"id":5968},"什么时候用-lora","什么时候用 LoRA",[357,5971,5972,5982],{},[360,5973,5974],{},[363,5975,5976,5979],{},[366,5977,5978],{},"场景",[366,5980,5981],{},"推荐",[375,5983,5984,5992,6000,6008,6015,6022,6029,6037],{},[363,5985,5986,5989],{},[380,5987,5988],{},"风格\u002F格式调整",[380,5990,5991],{},"✅ LoRA",[363,5993,5994,5997],{},[380,5995,5996],{},"领域知识注入",[380,5998,5999],{},"✅ LoRA + RAG",[363,6001,6002,6005],{},[380,6003,6004],{},"多租户定制",[380,6006,6007],{},"✅ LoRA（多 Adapter 部署）",[363,6009,6010,6013],{},[380,6011,6012],{},"快速实验",[380,6014,5991],{},[363,6016,6017,6019],{},[380,6018,5268],{},[380,6020,6021],{},"✅ QLoRA",[363,6023,6024,6027],{},[380,6025,6026],{},"全新语言学习",[380,6028,5154],{},[363,6030,6031,6034],{},[380,6032,6033],{},"安全对齐",[380,6035,6036],{},"全量微调 \u002F DPO",[363,6038,6039,6042],{},[380,6040,6041],{},"闭源模型微调",[380,6043,6044],{},"用厂商 fine-tuning API",[14,6046,733],{"id":733},[43,6048,6049,6054,6061],{},[46,6050,6051,6052],{},"路线选型：",[31,6053,2843],{"href":5023},[46,6055,6056,6057,562,6059],{},"数据准备前：",[31,6058,2028],{"href":2027},[31,6060,2024],{"href":2023},[46,6062,6063,6064,6066],{},"模型部署：",[31,6065,2016],{"href":2015}," 计费视角",[760,6068,6069],{},"html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html pre.shiki code .sScJk, html code.shiki .sScJk{--shiki-default:#6F42C1;--shiki-dark:#B392F0}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .s4XuR, html code.shiki .s4XuR{--shiki-default:#E36209;--shiki-dark:#FFAB70}",{"title":108,"searchDepth":140,"depth":140,"links":6071},[6072,6073,6077,6083,6084,6089,6090,6091,6092,6093,6098,6099],{"id":5068,"depth":123,"text":5069},{"id":2362,"depth":123,"text":2362,"children":6074},[6075,6076],{"id":5083,"depth":140,"text":5083},{"id":5100,"depth":140,"text":5101},{"id":5129,"depth":123,"text":5129,"children":6078},[6079,6080,6081,6082],{"id":5132,"depth":140,"text":5133},{"id":5184,"depth":140,"text":5185},{"id":5191,"depth":140,"text":5192},{"id":5209,"depth":140,"text":5210},{"id":5216,"depth":123,"text":5217},{"id":5324,"depth":123,"text":5324,"children":6085},[6086,6087,6088],{"id":5327,"depth":140,"text":5328},{"id":5334,"depth":140,"text":5335},{"id":5373,"depth":140,"text":5374},{"id":5431,"depth":123,"text":5432},{"id":5514,"depth":123,"text":5515},{"id":5579,"depth":123,"text":5580},{"id":5671,"depth":123,"text":5672},{"id":5705,"depth":123,"text":5705,"children":6094},[6095,6096,6097],{"id":5708,"depth":140,"text":5709},{"id":5823,"depth":140,"text":5824},{"id":5961,"depth":140,"text":5962},{"id":5968,"depth":123,"text":5969},{"id":733,"depth":123,"text":733},{},"\u002Fwiki\u002Flora",[6103,6104],"llama-4","qwen-3",{"title":3402,"description":108},"lora","wiki\u002Flora","Low-Rank Adaptation，一种高效微调方法：冻结原模型权重，只训练一个很小的低秩矩阵，大幅降低微调成本。",[3085,6110,6111],"微调","高效训练","-wdobqimIFHvkroMMQaVjM1ZIEzmf08L_2FwF_Qkkrc",{"id":6114,"title":6115,"body":6116,"category":779,"description":108,"extension":780,"meta":6967,"navigation":310,"path":6968,"published":783,"relatedModels":6969,"relatedTools":6970,"seo":6975,"slug":6976,"stem":6977,"summary":6978,"tags":6979,"updated":783,"__hash__":6980},"wiki\u002Fwiki\u002Fmcp.md","MCP (Model Context Protocol)",{"type":11,"value":6117,"toc":6947},[6118,6122,6125,6128,6130,6133,6144,6147,6150,6155,6157,6160,6166,6169,6172,6192,6195,6209,6212,6215,6315,6329,6331,6335,6338,6349,6356,6425,6429,6436,6504,6507,6510,6575,6579,6669,6682,6686,6753,6759,6761,6825,6829,6832,6861,6863,6866,6910,6922,6924,6944],[14,6119,6121],{"id":6120},"什么是-mcp","什么是 MCP",[19,6123,6124],{},"MCP（Model Context Protocol）是 Anthropic 于 2024 年 11 月发布的开放协议，用于标准化 AI 模型与外部工具、数据源、API 之间的连接方式。",[19,6126,6127],{},"打个比方：MCP 之于 AI 模型，就像 USB-C 之于电子设备——一个统一接口，连什么都行。",[14,6129,38],{"id":38},[19,6131,6132],{},"在 MCP 出现之前，每个 AI 工具要连接外部系统都需要单独开发集成：",[43,6134,6135,6138,6141],{},[46,6136,6137],{},"Cursor 想连数据库 → 写一套数据库适配",[46,6139,6140],{},"Claude Code 想连 GitHub → 写一套 GitHub 适配",[46,6142,6143],{},"Copilot 想连 Jira → 写一套 Jira 适配",[19,6145,6146],{},"结果是 N 个工具 × M 个数据源 = N×M 套适配代码。",[19,6148,6149],{},"MCP 把这变成 N+M：每个工具实现一次 MCP 客户端，每个数据源实现一次 MCP Server，就可以互相连接。",[86,6151,6152],{},[19,6153,6154],{},"这是「协议分层」的常见受益模式：HTTP 让任意浏览器能访问任意网站，LSP 让任意编辑器能支持任意语言。MCP 在做的是同一件事，只不过对象从「网页 \u002F 语言」换成「工具 \u002F 数据源」。",[14,6156,93],{"id":93},[19,6158,6159],{},"MCP 采用 Client-Server 架构：",[103,6161,6164],{"className":6162,"code":6163,"language":220},[218],"AI 工具（Cursor \u002F Claude Code）\n    ↕ MCP 协议（JSON-RPC 2.0）\nMCP Server（文件系统 \u002F 数据库 \u002F API）\n    ↕\n实际数据源\n",[110,6165,6163],{"__ignoreMap":108},[95,6167,6168],{"id":6168},"三种能力",[19,6170,6171],{},"每个 MCP Server 可以暴露三种能力：",[59,6173,6174,6180,6186],{},[46,6175,6176,6179],{},[23,6177,6178],{},"Tools（工具）"," — 模型可以调用的函数（查数据库、发邮件、读文件）",[46,6181,6182,6185],{},[23,6183,6184],{},"Resources（资源）"," — 模型可以读取的数据（文件内容、API 响应）",[46,6187,6188,6191],{},[23,6189,6190],{},"Prompts（提示词模板）"," — 预定义的提示词模板（代码审查模板、文档生成模板）",[95,6193,6194],{"id":6194},"传输方式",[43,6196,6197,6203],{},[46,6198,6199,6202],{},[23,6200,6201],{},"stdio"," — 本地进程通信（Claude Code 连本地 MCP Server，最常见）",[46,6204,6205,6208],{},[23,6206,6207],{},"SSE \u002F HTTP"," — 远程网络通信（Cursor 连远程托管的 MCP Server）",[95,6210,6211],{"id":6211},"消息格式",[19,6213,6214],{},"底层是 JSON-RPC 2.0，对调试很友好。一次「列出可用工具」的握手大致是这样：",[103,6216,6218],{"className":231,"code":6217,"language":233,"meta":108,"style":108},"\u002F\u002F Client → Server\n{ \"jsonrpc\": \"2.0\", \"id\": 1, \"method\": \"tools\u002Flist\" }\n\n\u002F\u002F Server → Client\n{\n  \"jsonrpc\": \"2.0\", \"id\": 1,\n  \"result\": {\n    \"tools\": [\n      {\n        \"name\": \"query_db\",\n        \"description\": \"Run a read-only SQL query\",\n        \"inputSchema\": {\n          \"type\": \"object\",\n          \"properties\": { \"sql\": { \"type\": \"string\" } },\n          \"required\": [\"sql\"]\n        }\n      }\n    ]\n  }\n}\n",[110,6219,6220,6225,6230,6234,6239,6243,6247,6252,6257,6262,6267,6272,6277,6282,6287,6292,6297,6301,6306,6310],{"__ignoreMap":108},[113,6221,6222],{"class":115,"line":116},[113,6223,6224],{},"\u002F\u002F Client → Server\n",[113,6226,6227],{"class":115,"line":123},[113,6228,6229],{},"{ \"jsonrpc\": \"2.0\", \"id\": 1, \"method\": \"tools\u002Flist\" }\n",[113,6231,6232],{"class":115,"line":140},[113,6233,311],{"emptyLinePlaceholder":310},[113,6235,6236],{"class":115,"line":153},[113,6237,6238],{},"\u002F\u002F Server → Client\n",[113,6240,6241],{"class":115,"line":174},[113,6242,120],{},[113,6244,6245],{"class":115,"line":187},[113,6246,254],{},[113,6248,6249],{"class":115,"line":5},[113,6250,6251],{},"  \"result\": {\n",[113,6253,6254],{"class":115,"line":272},[113,6255,6256],{},"    \"tools\": [\n",[113,6258,6259],{"class":115,"line":278},[113,6260,6261],{},"      {\n",[113,6263,6264],{"class":115,"line":284},[113,6265,6266],{},"        \"name\": \"query_db\",\n",[113,6268,6269],{"class":115,"line":290},[113,6270,6271],{},"        \"description\": \"Run a read-only SQL query\",\n",[113,6273,6274],{"class":115,"line":296},[113,6275,6276],{},"        \"inputSchema\": {\n",[113,6278,6279],{"class":115,"line":302},[113,6280,6281],{},"          \"type\": \"object\",\n",[113,6283,6284],{"class":115,"line":307},[113,6285,6286],{},"          \"properties\": { \"sql\": { \"type\": \"string\" } },\n",[113,6288,6289],{"class":115,"line":314},[113,6290,6291],{},"          \"required\": [\"sql\"]\n",[113,6293,6294],{"class":115,"line":320},[113,6295,6296],{},"        }\n",[113,6298,6299],{"class":115,"line":326},[113,6300,3583],{},[113,6302,6303],{"class":115,"line":331},[113,6304,6305],{},"    ]\n",[113,6307,6308],{"class":115,"line":337},[113,6309,299],{},[113,6311,6313],{"class":115,"line":6312},20,[113,6314,200],{},[19,6316,6317,6318,6321,6322,6325,6326,6328],{},"随后模型决定调用某个工具，Client 再发 ",[110,6319,6320],{},"tools\u002Fcall","，Server 返回执行结果。",[110,6323,6324],{},"inputSchema"," 是 JSON Schema，模型据此知道参数怎么传——这套机制和 ",[31,6327,903],{"href":756}," 同源，但把「函数定义」从「应用内硬编码」搬到了「Server 自我描述」。",[14,6330,3998],{"id":3998},[95,6332,6334],{"id":6333},"cursor-mcp","Cursor + MCP",[19,6336,6337],{},"Cursor 支持 MCP 后，可以：",[43,6339,6340,6343,6346],{},[46,6341,6342],{},"直接读取本地数据库 schema",[46,6344,6345],{},"调用外部 API 获取实时数据",[46,6347,6348],{},"读取项目文档和设计稿",[19,6350,6351,6352,6355],{},"配置示例（",[110,6353,6354],{},".cursor\u002Fmcp.json","）：",[103,6357,6359],{"className":105,"code":6358,"language":107,"meta":108,"style":108},"{\n  \"servers\": {\n    \"postgres\": {\n      \"command\": \"npx\",\n      \"args\": [\"-y\", \"@modelcontextprotocol\u002Fserver-postgres\", \"postgresql:\u002F\u002Flocalhost\u002Fmydb\"]\n    }\n  }\n}\n",[110,6360,6361,6365,6372,6379,6391,6413,6417,6421],{"__ignoreMap":108},[113,6362,6363],{"class":115,"line":116},[113,6364,120],{"class":119},[113,6366,6367,6370],{"class":115,"line":123},[113,6368,6369],{"class":126},"  \"servers\"",[113,6371,3530],{"class":119},[113,6373,6374,6377],{"class":115,"line":140},[113,6375,6376],{"class":126},"    \"postgres\"",[113,6378,3530],{"class":119},[113,6380,6381,6384,6386,6389],{"class":115,"line":153},[113,6382,6383],{"class":126},"      \"command\"",[113,6385,130],{"class":119},[113,6387,6388],{"class":133},"\"npx\"",[113,6390,137],{"class":119},[113,6392,6393,6396,6398,6401,6403,6406,6408,6411],{"class":115,"line":174},[113,6394,6395],{"class":126},"      \"args\"",[113,6397,159],{"class":119},[113,6399,6400],{"class":133},"\"-y\"",[113,6402,165],{"class":119},[113,6404,6405],{"class":133},"\"@modelcontextprotocol\u002Fserver-postgres\"",[113,6407,165],{"class":119},[113,6409,6410],{"class":133},"\"postgresql:\u002F\u002Flocalhost\u002Fmydb\"",[113,6412,3601],{"class":119},[113,6414,6415],{"class":115,"line":187},[113,6416,293],{"class":119},[113,6418,6419],{"class":115,"line":5},[113,6420,299],{"class":119},[113,6422,6423],{"class":115,"line":272},[113,6424,200],{"class":119},[95,6426,6428],{"id":6427},"claude-code-mcp","Claude Code + MCP",[19,6430,6431,6432,6435],{},"Claude Code 原生支持 MCP，通过 ",[110,6433,6434],{},"claude mcp add"," 命令添加 Server：",[103,6437,6439],{"className":5715,"code":6438,"language":5717,"meta":108,"style":108},"# 添加文件系统 MCP Server\nclaude mcp add filesystem -- npx -y @modelcontextprotocol\u002Fserver-filesystem \u002Fpath\u002Fto\u002Fproject\n\n# 添加 GitHub MCP Server\nclaude mcp add github -- npx -y @modelcontextprotocol\u002Fserver-github\n",[110,6440,6441,6446,6475,6479,6484],{"__ignoreMap":108},[113,6442,6443],{"class":115,"line":116},[113,6444,6445],{"class":3716},"# 添加文件系统 MCP Server\n",[113,6447,6448,6451,6454,6457,6460,6463,6466,6469,6472],{"class":115,"line":123},[113,6449,6450],{"class":5729},"claude",[113,6452,6453],{"class":133}," mcp",[113,6455,6456],{"class":133}," add",[113,6458,6459],{"class":133}," filesystem",[113,6461,6462],{"class":126}," --",[113,6464,6465],{"class":133}," npx",[113,6467,6468],{"class":126}," -y",[113,6470,6471],{"class":133}," @modelcontextprotocol\u002Fserver-filesystem",[113,6473,6474],{"class":133}," \u002Fpath\u002Fto\u002Fproject\n",[113,6476,6477],{"class":115,"line":140},[113,6478,311],{"emptyLinePlaceholder":310},[113,6480,6481],{"class":115,"line":153},[113,6482,6483],{"class":3716},"# 添加 GitHub MCP Server\n",[113,6485,6486,6488,6490,6492,6495,6497,6499,6501],{"class":115,"line":174},[113,6487,6450],{"class":5729},[113,6489,6453],{"class":133},[113,6491,6456],{"class":133},[113,6493,6494],{"class":133}," github",[113,6496,6462],{"class":126},[113,6498,6465],{"class":133},[113,6500,6468],{"class":126},[113,6502,6503],{"class":133}," @modelcontextprotocol\u002Fserver-github\n",[95,6505,6506],{"id":6506},"五分钟上手清单",[19,6508,6509],{},"第一次接入，建议按这个顺序，少踩坑：",[59,6511,6512,6532,6550,6556,6562],{},[46,6513,6514,558,6517,6520,6521,6524,6525,562,6528,6531],{},[23,6515,6516],{},"挑一个无副作用的 Server 试水",[110,6518,6519],{},"filesystem","（只读模式）或 ",[110,6522,6523],{},"sqlite","（指向本地测试库）。不要拿 ",[110,6526,6527],{},"github",[110,6529,6530],{},"postgres"," 写库这种带破坏力的当 Hello World。",[46,6533,6534,6537,6538,6541,6542,6545,6546,6549],{},[23,6535,6536],{},"在客户端里确认握手成功","——Cursor 设置页 \u002F Claude Code ",[110,6539,6540],{},"claude mcp list"," 应能看到工具列表。看不到通常是 ",[110,6543,6544],{},"command"," 路径或 ",[110,6547,6548],{},"args"," 写错。",[46,6551,6552,6555],{},[23,6553,6554],{},"手动让模型调一次","——例如「列出当前目录所有 .md 文件」，验证 stdio 通路工作。",[46,6557,6558,6561],{},[23,6559,6560],{},"加一个真实场景的 Server","——比如你常用的笔记\u002F数据库\u002FJira。",[46,6563,6564,6567,6568,6574],{},[23,6565,6566],{},"再考虑写自己的 Server","——SDK 见 ",[31,6569,6573],{"href":6570,"rel":6571},"https:\u002F\u002Fgithub.com\u002Fmodelcontextprotocol",[6572],"nofollow","官方仓库","，Python \u002F TypeScript 都有，最小骨架不超过 50 行。",[14,6576,6578],{"id":6577},"常见-mcp-server","常见 MCP Server",[357,6580,6581,6591],{},[360,6582,6583],{},[363,6584,6585,6588],{},[366,6586,6587],{},"Server",[366,6589,6590],{},"功能",[375,6592,6593,6600,6607,6614,6621,6629,6637,6645,6653,6661],{},[363,6594,6595,6597],{},[380,6596,6519],{},[380,6598,6599],{},"读写本地文件",[363,6601,6602,6604],{},[380,6603,6530],{},[380,6605,6606],{},"查询 PostgreSQL 数据库",[363,6608,6609,6611],{},[380,6610,6523],{},[380,6612,6613],{},"查询 SQLite 数据库",[363,6615,6616,6618],{},[380,6617,6527],{},[380,6619,6620],{},"操作 GitHub（PR\u002FIssue\u002F搜索）",[363,6622,6623,6626],{},[380,6624,6625],{},"gitlab",[380,6627,6628],{},"操作 GitLab",[363,6630,6631,6634],{},[380,6632,6633],{},"slack",[380,6635,6636],{},"发送 Slack 消息",[363,6638,6639,6642],{},[380,6640,6641],{},"google-drive",[380,6643,6644],{},"读取 Google Drive 文件",[363,6646,6647,6650],{},[380,6648,6649],{},"puppeteer",[380,6651,6652],{},"浏览器自动化",[363,6654,6655,6658],{},[380,6656,6657],{},"memory",[380,6659,6660],{},"知识图谱持久记忆",[363,6662,6663,6666],{},[380,6664,6665],{},"sequential-thinking",[380,6667,6668],{},"结构化推理",[19,6670,6671,6672,6677,6678,347],{},"完整列表见 ",[31,6673,6676],{"href":6674,"rel":6675},"https:\u002F\u002Fmodelcontextprotocol.io\u002Fservers",[6572],"modelcontextprotocol.io\u002Fservers","；社区聚合也可看 ",[31,6679,6681],{"href":6680},"\u002Fagent\u002Fprotocol\u002Fsmithery.html","Smithery",[14,6683,6685],{"id":6684},"与-function-calling-的区别","与 function calling 的区别",[357,6687,6688,6698],{},[360,6689,6690],{},[363,6691,6692,6694,6696],{},[366,6693,368],{},[366,6695,903],{},[366,6697,34],{},[375,6699,6700,6711,6720,6731,6742],{},[363,6701,6702,6705,6708],{},[380,6703,6704],{},"定义方式",[380,6706,6707],{},"每个工具硬编码",[380,6709,6710],{},"标准化协议",[363,6712,6713,6715,6717],{},[380,6714,3887],{},[380,6716,2971],{},[380,6718,6719],{},"✅ 一次开发，处处可用",[363,6721,6722,6725,6728],{},[380,6723,6724],{},"运行时发现",[380,6726,6727],{},"❌ 预定义",[380,6729,6730],{},"✅ 动态发现 Server 能力",[363,6732,6733,6736,6739],{},[380,6734,6735],{},"生态",[380,6737,6738],{},"各自为政",[380,6740,6741],{},"统一社区",[363,6743,6744,6747,6750],{},[380,6745,6746],{},"底层机制",[380,6748,6749],{},"模型 API 直传 schema",[380,6751,6752],{},"仍是 function calling，但 schema 来自 Server",[19,6754,6755,6756,6758],{},"简而言之：MCP 没有取代 ",[31,6757,903],{"href":756},"，而是给「函数定义从哪儿来」加了一层标准化。模型那一端的协议（拿到 schema → 决定调哪个 → 传参 → 收结果）没变。",[14,6760,588],{"id":588},[43,6762,6763,6779,6792,6801,6815],{},[46,6764,6765,6768,6769,6772,6773,562,6776,347],{},[23,6766,6767],{},"stdio Server 进程没退干净","——客户端崩溃时子进程会变孤儿，下次启动连不上端口。Linux\u002FmacOS 用 ",[110,6770,6771],{},"ps aux | grep mcp"," 清掉；Windows 任务管理器搜 ",[110,6774,6775],{},"npx",[110,6777,6778],{},"node",[46,6780,6781,6787,6788,6791],{},[23,6782,6783,6786],{},[110,6784,6785],{},"npx -y"," 首次拉包慢，握手超时","——预先在终端跑一次 ",[110,6789,6790],{},"npx -y @modelcontextprotocol\u002Fserver-foo --help"," 把缓存暖好。",[46,6793,6794,558,6797,6800],{},[23,6795,6796],{},"路径含空格 \u002F 中文",[110,6798,6799],{},"server-filesystem"," 早期版本对 Windows 路径处理粗糙，建议用全英文路径或链接。",[46,6802,6803,558,6806,6809,6810,6814],{},[23,6804,6805],{},"凭据明文写在 mcp.json",[110,6807,6808],{},"mcp.json"," 经常被提交到 git，里面别直接写 DB 密码 \u002F API Key。用环境变量或 ",[31,6811,6813],{"href":6812},"\u002Fagent\u002Fprotocol\u002Fcomposio.html","Composio"," 这类托管 Auth 的中间层。",[46,6816,6817,6820,6821,347],{},[23,6818,6819],{},"生产环境装社区 Server","——MCP Server 拿到的是模型可调用权限，恶意 Server 可以让模型「自愿」泄漏数据。社区 Server 上生产前必须审代码，参考 ",[31,6822,6824],{"href":6823},"\u002Fnews\u002F2026\u002Fmcp-1-0-release.html","MCP 1.0 发布日的观点",[14,6826,6828],{"id":6827},"什么场景不该用-mcp","什么场景不该用 MCP",[19,6830,6831],{},"MCP 不是万能胶水，下面这些场景上 MCP 是给自己加复杂度：",[43,6833,6834,6840,6846,6852],{},[46,6835,6836,6839],{},[23,6837,6838],{},"只在一个应用内调用，永不复用","——直接 function calling 更轻。",[46,6841,6842,6845],{},[23,6843,6844],{},"延迟敏感的同步调用","——MCP 多了一层 stdio\u002FHTTP 跳转，纳秒级场景别套。",[46,6847,6848,6851],{},[23,6849,6850],{},"大流量数据搬运","——MCP 通信适合「指令 + 元数据」，搬几百 MB 文件应该走旁路（让模型拿到下载链接，而不是把字节流塞进 MCP 消息）。",[46,6853,6854,6857,6858,6860],{},[23,6855,6856],{},"多 Agent 互相协作","——这是 ",[31,6859,373],{"href":1460}," 的目标，不是 MCP。一句话区分：MCP 是 Agent ↔ 工具，A2A 是 Agent ↔ Agent。",[14,6862,709],{"id":709},[19,6864,6865],{},"截至 2026 年中，MCP 已被以下工具支持：",[43,6867,6868,6878,6887,6892,6898,6904],{},[46,6869,6870,6877],{},[23,6871,6872,6873],{},"Claude Code \u002F ",[31,6874,6876],{"href":6875},"\u002Fagent\u002Fdesktop\u002Fclaude-desktop.html","Claude Desktop","（Anthropic）— 原生",[46,6879,6880,6886],{},[23,6881,6882],{},[31,6883,6885],{"href":6884},"\u002Fcoding\u002Fide\u002Fcursor.html","Cursor","（AI IDE）— 原生",[46,6888,6889,6886],{},[23,6890,6891],{},"Windsurf",[46,6893,6894,6897],{},[23,6895,6896],{},"Cline \u002F Roo Code","（CLI）— 原生",[46,6899,6900,6903],{},[23,6901,6902],{},"Coze \u002F Dify","（Agent 平台）— 部分支持",[46,6905,6906,6909],{},[23,6907,6908],{},"VS Code Copilot"," — 已支持",[19,6911,6912,6913,633,6915,6919,6920,347],{},"社区已有 200+ 个开源 MCP Server，覆盖数据库、云服务、开发工具、办公软件等场景。聚合与一键安装可看 ",[31,6914,6681],{"href":6680},[31,6916,6918],{"href":6917},"\u002Fagent\u002Fprotocol\u002Fmcp-toolbox.html","MCP Toolbox","，权限\u002FAuth 托管可看 ",[31,6921,6813],{"href":6812},[14,6923,733],{"id":733},[43,6925,6926,6933,6938],{},[46,6927,6928,6929,6932],{},"协议层面对比：",[31,6930,6931],{"href":1460},"A2A（Agent-to-Agent Protocol）","——Agent ↔ Agent 通信",[46,6934,745,6935,6937],{},[31,6936,757],{"href":756},"——模型如何决定调用哪个工具",[46,6939,6940,6941],{},"行业事件：",[31,6942,6943],{"href":6823},"Anthropic 推出 MCP 协议 1.0",[760,6945,6946],{},"html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html pre.shiki code .sScJk, html code.shiki .sScJk{--shiki-default:#6F42C1;--shiki-dark:#B392F0}",{"title":108,"searchDepth":140,"depth":140,"links":6948},[6949,6950,6951,6956,6961,6962,6963,6964,6965,6966],{"id":6120,"depth":123,"text":6121},{"id":38,"depth":123,"text":38},{"id":93,"depth":123,"text":93,"children":6952},[6953,6954,6955],{"id":6168,"depth":140,"text":6168},{"id":6194,"depth":140,"text":6194},{"id":6211,"depth":140,"text":6211},{"id":3998,"depth":123,"text":3998,"children":6957},[6958,6959,6960],{"id":6333,"depth":140,"text":6334},{"id":6427,"depth":140,"text":6428},{"id":6506,"depth":140,"text":6506},{"id":6577,"depth":123,"text":6578},{"id":6684,"depth":123,"text":6685},{"id":588,"depth":123,"text":588},{"id":6827,"depth":123,"text":6828},{"id":709,"depth":123,"text":709},{"id":733,"depth":123,"text":733},{},"\u002Fwiki\u002Fmcp",[1502],[6971,6972,6973,2062,6974],"agent\u002Fprotocol\u002Fsmithery","agent\u002Fprotocol\u002Fmcp-toolbox","agent\u002Fprotocol\u002Fcomposio","agent\u002Fdesktop\u002Fclaude-desktop",{"title":6115,"description":108},"mcp","wiki\u002Fmcp","Anthropic 推出的开放协议，让 AI 模型标准化地连接外部工具、数据源和 API，类似于 AI 的 USB-C 接口。",[34,794,429,396],"xEz90j9YjHTavwY1Bx1iwQkejQrEGbROZvSV_zNo_J0",{"id":6982,"title":6983,"body":6984,"category":2057,"description":108,"extension":780,"meta":7710,"navigation":310,"path":7711,"published":783,"relatedModels":7712,"relatedTools":7713,"seo":7714,"slug":7715,"stem":7716,"summary":7717,"tags":7718,"updated":783,"__hash__":7721},"wiki\u002Fwiki\u002Fprompt-engineering.md","Prompt Engineering（提示词工程）",{"type":11,"value":6985,"toc":7672},[6986,6990,6993,6999,7002,7006,7009,7015,7019,7022,7028,7032,7035,7041,7044,7048,7051,7057,7067,7071,7074,7080,7084,7087,7093,7097,7100,7143,7202,7205,7238,7245,7249,7252,7407,7410,7428,7432,7438,7444,7447,7450,7461,7464,7468,7471,7475,7481,7485,7488,7492,7495,7501,7505,7508,7512,7522,7526,7529,7535,7538,7542,7545,7549,7556,7561,7565,7569,7572,7598,7602,7608,7610,7614,7617,7621,7624,7628,7631,7635,7638,7642,7645,7647,7670],[14,6987,6989],{"id":6988},"什么是-prompt-engineering","什么是 Prompt Engineering",[19,6991,6992],{},"Prompt Engineering 是设计和优化 LLM 输入提示词的技术，目标是让模型生成更准确、更有用的输出。",[19,6994,6995,6996,347],{},"它不是\"哄模型\"，而是",[23,6997,6998],{},"用结构化的方式精确传达你的意图",[14,7000,7001],{"id":7001},"核心技巧",[95,7003,7005],{"id":7004},"_1-角色设定","1. 角色设定",[19,7007,7008],{},"给模型一个明确的角色，让它知道用什么视角回答。",[103,7010,7013],{"className":7011,"code":7012,"language":220},[218],"你是一个资深 Python 后端工程师，擅长 FastAPI 和 PostgreSQL。\n请审查以下代码的安全性和性能问题。\n",[110,7014,7012],{"__ignoreMap":108},[95,7016,7018],{"id":7017},"_2-few-shot-示例","2. Few-shot 示例",[19,7020,7021],{},"给模型几个输入输出范例，让它学会你期望的模式。",[103,7023,7026],{"className":7024,"code":7025,"language":220},[218],"输入：这个函数太慢了\n输出：该函数时间复杂度为 O(n²)，建议改为哈希表查找，降至 O(n)。\n\n输入：这里会内存泄漏\n输出：该代码创建了事件监听器但未在组件销毁时移除，导致内存无法回收。\n\n输入：{{用户输入}}\n输出：\n",[110,7027,7025],{"__ignoreMap":108},[95,7029,7031],{"id":7030},"_3-chain-of-thought-cot","3. Chain-of-Thought (CoT)",[19,7033,7034],{},"让模型\"想一想再回答\"，显著提高推理准确率。",[103,7036,7039],{"className":7037,"code":7038,"language":220},[218],"请逐步分析以下问题，先写出推理过程，再给出最终答案。\n\n问题：一个水池有两个进水管，A 管 3 小时注满，B 管 5 小时注满，同时开几小时注满？\n",[110,7040,7038],{"__ignoreMap":108},[19,7042,7043],{},"对于 Claude Sonnet 4 \u002F GPT-5 等推理模型，不需要显式要求 CoT——它们内置了思维链能力。",[95,7045,7047],{"id":7046},"_4-结构化输出","4. 结构化输出",[19,7049,7050],{},"要求模型用特定格式输出，方便程序解析。",[103,7052,7055],{"className":7053,"code":7054,"language":220},[218],"请以 JSON 格式输出，包含以下字段：\n{\n  \"bug_found\": true\u002Ffalse,\n  \"severity\": \"high\u002Fmedium\u002Flow\",\n  \"description\": \"问题描述\",\n  \"fix_suggestion\": \"修复建议\"\n}\n",[110,7056,7054],{"__ignoreMap":108},[19,7058,7059,7060,7063,7064,7066],{},"生产场景下不要靠 prompt\"求\"模型出 JSON——用 ",[31,7061,7062],{"href":756},"Structured Outputs","（OpenAI）、Tool Use（Anthropic）或 ",[110,7065,3857],{},"（Gemini）做硬约束。",[95,7068,7070],{"id":7069},"_5-约束条件","5. 约束条件",[19,7072,7073],{},"明确告诉模型什么该做、什么不该做。",[103,7075,7078],{"className":7076,"code":7077,"language":220},[218],"规则：\n1. 只审查安全相关的问题，不评论代码风格\n2. 如果没有安全问题，明确说\"未发现安全问题\"\n3. 每个问题必须给出具体的代码行号\n4. 不要给出模糊的建议如\"注意安全\"\n",[110,7079,7077],{"__ignoreMap":108},[95,7081,7083],{"id":7082},"_6-分步指令","6. 分步指令",[19,7085,7086],{},"复杂任务拆成明确步骤。",[103,7088,7091],{"className":7089,"code":7090,"language":220},[218],"请按以下步骤执行：\n1. 读取 src\u002Fauth.ts 文件\n2. 找出所有 SQL 查询\n3. 检查是否使用了参数化查询\n4. 列出有 SQL 注入风险的查询\n5. 给出修复建议\n",[110,7092,7090],{"__ignoreMap":108},[14,7094,7096],{"id":7095},"xml-vs-markdown-结构化","XML vs Markdown 结构化",[19,7098,7099],{},"主流两种 prompt 结构化方式，按模型挑：",[103,7101,7103],{"className":1594,"code":7102,"language":1596,"meta":108,"style":108},"\u003C!-- XML 风格（Anthropic 官方推荐）-->\n\u003Ccontext>\n  \u003Cfile path=\"src\u002Fauth.ts\">...\u003C\u002Ffile>\n\u003C\u002Fcontext>\n\u003Ctask>Review for security issues.\u003C\u002Ftask>\n\u003Crules>\n  \u003Crule>Only report security findings.\u003C\u002Frule>\n\u003C\u002Frules>\n",[110,7104,7105,7110,7114,7119,7123,7128,7133,7138],{"__ignoreMap":108},[113,7106,7107],{"class":115,"line":116},[113,7108,7109],{},"\u003C!-- XML 风格（Anthropic 官方推荐）-->\n",[113,7111,7112],{"class":115,"line":123},[113,7113,1603],{},[113,7115,7116],{"class":115,"line":140},[113,7117,7118],{},"  \u003Cfile path=\"src\u002Fauth.ts\">...\u003C\u002Ffile>\n",[113,7120,7121],{"class":115,"line":153},[113,7122,1636],{},[113,7124,7125],{"class":115,"line":174},[113,7126,7127],{},"\u003Ctask>Review for security issues.\u003C\u002Ftask>\n",[113,7129,7130],{"class":115,"line":187},[113,7131,7132],{},"\u003Crules>\n",[113,7134,7135],{"class":115,"line":5},[113,7136,7137],{},"  \u003Crule>Only report security findings.\u003C\u002Frule>\n",[113,7139,7140],{"class":115,"line":272},[113,7141,7142],{},"\u003C\u002Frules>\n",[103,7144,7148],{"className":7145,"code":7146,"language":7147,"meta":108,"style":108},"language-markdown shiki shiki-themes github-light github-dark","\u003C!-- Markdown 风格（OpenAI 偏好）-->\n## Context\n\nFile: src\u002Fauth.ts\n[content]\n\n## Task\nReview for security issues.\n\n## Rules\n- Only report security findings.\n","markdown",[110,7149,7150,7155,7160,7164,7169,7174,7178,7183,7188,7192,7197],{"__ignoreMap":108},[113,7151,7152],{"class":115,"line":116},[113,7153,7154],{},"\u003C!-- Markdown 风格（OpenAI 偏好）-->\n",[113,7156,7157],{"class":115,"line":123},[113,7158,7159],{},"## Context\n",[113,7161,7162],{"class":115,"line":140},[113,7163,311],{"emptyLinePlaceholder":310},[113,7165,7166],{"class":115,"line":153},[113,7167,7168],{},"File: src\u002Fauth.ts\n",[113,7170,7171],{"class":115,"line":174},[113,7172,7173],{},"[content]\n",[113,7175,7176],{"class":115,"line":187},[113,7177,311],{"emptyLinePlaceholder":310},[113,7179,7180],{"class":115,"line":5},[113,7181,7182],{},"## Task\n",[113,7184,7185],{"class":115,"line":272},[113,7186,7187],{},"Review for security issues.\n",[113,7189,7190],{"class":115,"line":278},[113,7191,311],{"emptyLinePlaceholder":310},[113,7193,7194],{"class":115,"line":284},[113,7195,7196],{},"## Rules\n",[113,7198,7199],{"class":115,"line":290},[113,7200,7201],{},"- Only report security findings.\n",[19,7203,7204],{},"经验：",[43,7206,7207,7213,7226,7232],{},[46,7208,7209,7212],{},[23,7210,7211],{},"Claude 系列","：XML 标签明显更稳定，错读概率低",[46,7214,7215,7218,7219,562,7222,7225],{},[23,7216,7217],{},"GPT 系列","：Markdown 标题更自然，对 ",[110,7220,7221],{},"##",[110,7223,7224],{},"###"," 层级敏感",[46,7227,7228,7231],{},[23,7229,7230],{},"Gemini","：两者差不多",[46,7233,7234,7237],{},[23,7235,7236],{},"国产模型","：建议 Markdown，部分模型对 XML 的训练数据少",[19,7239,7240,7241,7244],{},"混搭也行，但",[23,7242,7243],{},"一个 prompt 内挑一种贯彻到底","，不要 XML 和 Markdown 交替。",[14,7246,7248],{"id":7247},"anthropic-openai-官方模板风格","Anthropic \u002F OpenAI 官方模板风格",[19,7250,7251],{},"工业级 prompt 通常长这样（Anthropic 风格示例）：",[103,7253,7255],{"className":1594,"code":7254,"language":1596,"meta":108,"style":108},"\u003Crole>\nYou are a senior security engineer reviewing pull requests.\n\u003C\u002Frole>\n\n\u003Cinstructions>\n1. Read the diff in \u003Cdiff> below\n2. Identify security vulnerabilities\n3. Output findings in the format specified in \u003Coutput_format>\n\u003C\u002Finstructions>\n\n\u003Coutput_format>\nFor each finding:\n- severity: critical | high | medium | low\n- file: \u003Cpath>\n- line: \u003Cnumber>\n- issue: \u003Cone sentence>\n- fix: \u003Ccode or text>\n\u003C\u002Foutput_format>\n\n\u003Cexamples>\n\u003Cexample>\nInput: ... diff with SQL injection ...\nOutput: { \"severity\": \"critical\", \"file\": \"auth.py\", ... }\n\u003C\u002Fexample>\n\u003C\u002Fexamples>\n\n\u003Cdiff>\n{{actual diff}}\n\u003C\u002Fdiff>\n",[110,7256,7257,7262,7267,7272,7276,7281,7286,7291,7296,7301,7305,7310,7315,7320,7325,7330,7335,7340,7345,7349,7354,7360,7366,7372,7378,7384,7389,7395,7401],{"__ignoreMap":108},[113,7258,7259],{"class":115,"line":116},[113,7260,7261],{},"\u003Crole>\n",[113,7263,7264],{"class":115,"line":123},[113,7265,7266],{},"You are a senior security engineer reviewing pull requests.\n",[113,7268,7269],{"class":115,"line":140},[113,7270,7271],{},"\u003C\u002Frole>\n",[113,7273,7274],{"class":115,"line":153},[113,7275,311],{"emptyLinePlaceholder":310},[113,7277,7278],{"class":115,"line":174},[113,7279,7280],{},"\u003Cinstructions>\n",[113,7282,7283],{"class":115,"line":187},[113,7284,7285],{},"1. Read the diff in \u003Cdiff> below\n",[113,7287,7288],{"class":115,"line":5},[113,7289,7290],{},"2. Identify security vulnerabilities\n",[113,7292,7293],{"class":115,"line":272},[113,7294,7295],{},"3. Output findings in the format specified in \u003Coutput_format>\n",[113,7297,7298],{"class":115,"line":278},[113,7299,7300],{},"\u003C\u002Finstructions>\n",[113,7302,7303],{"class":115,"line":284},[113,7304,311],{"emptyLinePlaceholder":310},[113,7306,7307],{"class":115,"line":290},[113,7308,7309],{},"\u003Coutput_format>\n",[113,7311,7312],{"class":115,"line":296},[113,7313,7314],{},"For each finding:\n",[113,7316,7317],{"class":115,"line":302},[113,7318,7319],{},"- severity: critical | high | medium | low\n",[113,7321,7322],{"class":115,"line":307},[113,7323,7324],{},"- file: \u003Cpath>\n",[113,7326,7327],{"class":115,"line":314},[113,7328,7329],{},"- line: \u003Cnumber>\n",[113,7331,7332],{"class":115,"line":320},[113,7333,7334],{},"- issue: \u003Cone sentence>\n",[113,7336,7337],{"class":115,"line":326},[113,7338,7339],{},"- fix: \u003Ccode or text>\n",[113,7341,7342],{"class":115,"line":331},[113,7343,7344],{},"\u003C\u002Foutput_format>\n",[113,7346,7347],{"class":115,"line":337},[113,7348,311],{"emptyLinePlaceholder":310},[113,7350,7351],{"class":115,"line":6312},[113,7352,7353],{},"\u003Cexamples>\n",[113,7355,7357],{"class":115,"line":7356},21,[113,7358,7359],{},"\u003Cexample>\n",[113,7361,7363],{"class":115,"line":7362},22,[113,7364,7365],{},"Input: ... diff with SQL injection ...\n",[113,7367,7369],{"class":115,"line":7368},23,[113,7370,7371],{},"Output: { \"severity\": \"critical\", \"file\": \"auth.py\", ... }\n",[113,7373,7375],{"class":115,"line":7374},24,[113,7376,7377],{},"\u003C\u002Fexample>\n",[113,7379,7381],{"class":115,"line":7380},25,[113,7382,7383],{},"\u003C\u002Fexamples>\n",[113,7385,7387],{"class":115,"line":7386},26,[113,7388,311],{"emptyLinePlaceholder":310},[113,7390,7392],{"class":115,"line":7391},27,[113,7393,7394],{},"\u003Cdiff>\n",[113,7396,7398],{"class":115,"line":7397},28,[113,7399,7400],{},"{{actual diff}}\n",[113,7402,7404],{"class":115,"line":7403},29,[113,7405,7406],{},"\u003C\u002Fdiff>\n",[19,7408,7409],{},"要点：",[43,7411,7412,7418,7425],{},[46,7413,7414,7415],{},"角色、指令、输出格式、示例、动态内容",[23,7416,7417],{},"各占独立标签",[46,7419,7420,7421,7424],{},"动态内容（用户输入、检索结果）",[23,7422,7423],{},"放最后","——既配合 prompt cache，又减少注入风险",[46,7426,7427],{},"Few-shot 数量 1-5 个最佳，过多反而稀释指令权重",[14,7429,7431],{"id":7430},"推理模型的-prompt-写法不一样","推理模型的 prompt 写法不一样",[19,7433,7434,7435,2524],{},"GPT-5、Claude Opus 4 thinking、DeepSeek-R1 等推理模型",[23,7436,7437],{},"不应再写 CoT 指令",[103,7439,7442],{"className":7440,"code":7441,"language":220},[218],"❌ 老写法（对 GPT-4o 有效）\n\"请一步一步思考，先写出推理过程...\"\n\n✅ 推理模型新写法\n直接说目标和约束，不要教它怎么想\n",[110,7443,7441],{"__ignoreMap":108},[19,7445,7446],{},"原因：推理模型内置了思维链，再加 CoT 反而会让它在\"展示思考\"上花更多 token、却没有更聪明。给清楚目标和验收标准即可。",[19,7448,7449],{},"类似的：",[43,7451,7452,7455,7458],{},[46,7453,7454],{},"推理模型不需要 \"take a deep breath\" \u002F \"you are an expert\" 这类老套激励",[46,7456,7457],{},"Few-shot 仍然有效，但作用从\"示范怎么想\"变成\"约束输出格式\"",[46,7459,7460],{},"温度建议保持默认（通常 1.0），别强行调 0",[14,7462,7463],{"id":7463},"高级技巧",[95,7465,7467],{"id":7466},"self-consistency","Self-Consistency",[19,7469,7470],{},"让模型多次回答同一个问题，取多数结果。适用于数学\u002F推理题。",[95,7472,7474],{"id":7473},"react","ReAct",[19,7476,7477,7478,7480],{},"Thought → Action → Observation 循环，",[31,7479,795],{"href":748}," 的基础模式。",[95,7482,7484],{"id":7483},"tree-of-thoughts","Tree of Thoughts",[19,7486,7487],{},"让模型探索多条推理路径，选最优的。适合复杂决策。",[95,7489,7491],{"id":7490},"prompt-chaining","Prompt Chaining",[19,7493,7494],{},"把一个复杂任务拆成多个 prompt 串联：",[103,7496,7499],{"className":7497,"code":7498,"language":220},[218],"Prompt 1: 提取文章关键信息 → 输出 JSON\nPrompt 2: 基于 JSON 生成摘要 → 输出摘要\nPrompt 3: 基于摘要生成社交媒体文案 → 输出文案\n",[110,7500,7498],{"__ignoreMap":108},[14,7502,7504],{"id":7503},"prompt-的工程化当-prompt-变成代码","Prompt 的工程化：当 prompt 变成代码",[19,7506,7507],{},"生产级 prompt 不是字符串拼接，是要进 git 的\"代码\"。需要：",[95,7509,7511],{"id":7510},"_1-版本化-code-review","1. 版本化 + Code Review",[19,7513,7514,7515,562,7518,7521],{},"每个 prompt 一个文件（",[110,7516,7517],{},".md",[110,7519,7520],{},".txt","），改动走 PR review。模板插值用 Jinja2 \u002F Handlebars 而不是 f-string 散落各处。",[95,7523,7525],{"id":7524},"_2-评测集eval-set","2. 评测集（Eval Set）",[19,7527,7528],{},"任何 prompt 改动前必跑回归测试：",[103,7530,7533],{"className":7531,"code":7532,"language":220},[218],"golden_dataset.jsonl\n├─ 100 条典型 case\n│  - input\n│  - expected_output（或 expected_format）\n│  - rubric（评分细则）\n└─ 跑新 prompt → 自动比对 → 通过率不降才合并\n",[110,7534,7532],{"__ignoreMap":108},[19,7536,7537],{},"工具：Promptfoo \u002F LangSmith \u002F Braintrust \u002F 自己写。",[95,7539,7541],{"id":7540},"_3-ab-测试","3. A\u002FB 测试",[19,7543,7544],{},"灰度发布 prompt 变更，看真实用户场景下指标（任务成功率、用户满意度、token 成本）。",[95,7546,7548],{"id":7547},"_4-监控","4. 监控",[19,7550,7551,7552,7555],{},"线上每个调用都记 ",[110,7553,7554],{},"prompt_version + input + output + tokens","，方便定位回归。",[86,7557,7558],{},[19,7559,7560],{},"把 prompt 当代码维护后，\"改一个字模型就崩\"的痛苦会大幅减少。这是从 demo 到生产最关键的工程化跃迁。",[14,7562,7564],{"id":7563},"在-ai-编程中的实践","在 AI 编程中的实践",[95,7566,7568],{"id":7567},"cursor-claude-code","Cursor \u002F Claude Code",[19,7570,7571],{},"这些工具内部用了大量 prompt engineering：",[43,7573,7574,7580,7586,7592],{},[46,7575,7576,7579],{},[23,7577,7578],{},"System Prompt"," — 定义 AI 的角色和行为规范",[46,7581,7582,7585],{},[23,7583,7584],{},"Context Assembly"," — 组装文件、光标位置、对话历史",[46,7587,7588,7591],{},[23,7589,7590],{},"Tool Definitions"," — 定义搜索、编辑、终端等工具",[46,7593,7594,7597],{},[23,7595,7596],{},"Few-shot"," — 展示工具调用的正确格式",[95,7599,7601],{"id":7600},"代码审查-prompt","代码审查 Prompt",[103,7603,7606],{"className":7604,"code":7605,"language":220},[218],"你是一个代码审查专家。请审查以下 Git diff：\n\n{{diff}}\n\n检查以下方面：\n1. 安全漏洞（SQL 注入、XSS、敏感信息泄露）\n2. 性能问题（N+1 查询、内存泄漏、不必要的计算）\n3. 逻辑错误（边界条件、空指针、竞态条件）\n4. 可维护性（命名、复杂度、重复代码）\n\n输出格式：\n- 🔴 严重：[问题描述] (行号)\n- 🟡 建议：[问题描述] (行号)\n- 🟢 良好：[做得好的地方]\n",[110,7607,7605],{"__ignoreMap":108},[14,7609,3355],{"id":3355},[95,7611,7613],{"id":7612},"_1-prompt-越长越好","1. \"Prompt 越长越好\"",[19,7615,7616],{},"不。无关内容会分散模型注意力。每个词都应有存在理由。",[95,7618,7620],{"id":7619},"_2-加你是专家就有用","2. \"加'你是专家'就有用\"",[19,7622,7623],{},"角色设定只在角色与任务相关时有效。\"你是诗人\"对代码审查没有帮助。",[95,7625,7627],{"id":7626},"_3-示例越多越好","3. \"示例越多越好\"",[19,7629,7630],{},"3-5 个精选示例通常优于 10 个冗余示例。质量 > 数量。",[95,7632,7634],{"id":7633},"_4-一个-prompt-解决所有问题","4. \"一个 prompt 解决所有问题\"",[19,7636,7637],{},"复杂任务应该用 prompt chaining 或 agent 模式拆分，而不是写一个巨型 prompt。",[95,7639,7641],{"id":7640},"_5-调好的-prompt-一直能用","5. \"调好的 prompt 一直能用\"",[19,7643,7644],{},"模型每次升级（GPT-4 → GPT-5），prompt 行为都会微变。改基础模型必须重跑评测集。",[14,7646,733],{"id":733},[43,7648,7649,7655,7661,7666],{},[46,7650,7651,7652,7654],{},"进阶视角：",[31,7653,1473],{"href":1472},"——Prompt 是其中一部分",[46,7656,7657,7658,7660],{},"结构化输出：",[31,7659,757],{"href":756}," 的 Structured Outputs",[46,7662,7663,7664],{},"Agent 中的 prompt：",[31,7665,749],{"href":748},[46,7667,4395,7668],{},[31,7669,4352],{"href":4351},[760,7671,2037],{},{"title":108,"searchDepth":140,"depth":140,"links":7673},[7674,7675,7683,7684,7685,7686,7692,7698,7702,7709],{"id":6988,"depth":123,"text":6989},{"id":7001,"depth":123,"text":7001,"children":7676},[7677,7678,7679,7680,7681,7682],{"id":7004,"depth":140,"text":7005},{"id":7017,"depth":140,"text":7018},{"id":7030,"depth":140,"text":7031},{"id":7046,"depth":140,"text":7047},{"id":7069,"depth":140,"text":7070},{"id":7082,"depth":140,"text":7083},{"id":7095,"depth":123,"text":7096},{"id":7247,"depth":123,"text":7248},{"id":7430,"depth":123,"text":7431},{"id":7463,"depth":123,"text":7463,"children":7687},[7688,7689,7690,7691],{"id":7466,"depth":140,"text":7467},{"id":7473,"depth":140,"text":7474},{"id":7483,"depth":140,"text":7484},{"id":7490,"depth":140,"text":7491},{"id":7503,"depth":123,"text":7504,"children":7693},[7694,7695,7696,7697],{"id":7510,"depth":140,"text":7511},{"id":7524,"depth":140,"text":7525},{"id":7540,"depth":140,"text":7541},{"id":7547,"depth":140,"text":7548},{"id":7563,"depth":123,"text":7564,"children":7699},[7700,7701],{"id":7567,"depth":140,"text":7568},{"id":7600,"depth":140,"text":7601},{"id":3355,"depth":123,"text":3355,"children":7703},[7704,7705,7706,7707,7708],{"id":7612,"depth":140,"text":7613},{"id":7619,"depth":140,"text":7620},{"id":7626,"depth":140,"text":7627},{"id":7633,"depth":140,"text":7634},{"id":7640,"depth":140,"text":7641},{"id":733,"depth":123,"text":733},{},"\u002Fwiki\u002Fprompt-engineering",[1502,1503],[2062,1507],{"title":6983,"description":108},"prompt-engineering","wiki\u002Fprompt-engineering","通过精心设计提示词来引导 LLM 生成高质量输出的技术，包括 few-shot、CoT、角色设定、结构化输出等方法。",[3342,7719,7720,7596],"提示词","CoT","WvZdrEDRd2GefeD5a7RR5CZpK25BZFdxSt6HqSH4IQo",{"id":7723,"title":7724,"body":7725,"category":8412,"description":108,"extension":780,"meta":8413,"navigation":310,"path":8414,"published":783,"relatedModels":784,"relatedTools":8415,"seo":8416,"slug":8417,"stem":8418,"summary":8419,"tags":8420,"updated":783,"__hash__":8424},"wiki\u002Fwiki\u002Frag.md","RAG (检索增强生成)",{"type":11,"value":7726,"toc":8388},[7727,7731,7734,7740,7742,7745,7766,7769,7772,7778,7781,7783,7786,7854,7858,7863,7913,7916,7919,7939,7942,7946,7950,7953,7957,7960,7986,7988,7991,8002,8006,8013,8019,8022,8026,8029,8035,8038,8042,8045,8049,8052,8134,8137,8157,8162,8166,8169,8212,8216,8219,8259,8263,8293,8297,8356,8364,8366],[14,7728,7730],{"id":7729},"什么是-rag","什么是 RAG",[19,7732,7733],{},"RAG（Retrieval-Augmented Generation，检索增强生成）是一种 AI 架构模式：在让大模型生成回答之前，先从外部知识库中检索相关信息，把检索到的内容作为上下文喂给模型。",[19,7735,7736,7737,347],{},"简单说就是：",[23,7738,7739],{},"先查资料，再回答",[14,7741,38],{"id":38},[19,7743,7744],{},"大模型有三个固有缺陷，RAG 可以缓解：",[59,7746,7747,7752,7760],{},[46,7748,7749,7751],{},[23,7750,4514],{}," — 模型训练数据有截止日期，不知道最新信息",[46,7753,7754,7759],{},[23,7755,7756],{},[31,7757,5060],{"href":7758},"\u002Fwiki\u002Fhallucination.html"," — 模型会一本正经地编造不存在的信息",[46,7761,7762,7765],{},[23,7763,7764],{},"领域知识不足"," — 通用模型不了解企业内部文档、私有数据",[19,7767,7768],{},"RAG 通过引入外部知识库，让模型基于真实文档回答，大幅降低幻觉率。",[14,7770,7771],{"id":7771},"工作流程",[103,7773,7776],{"className":7774,"code":7775,"language":220},[218],"用户提问\n  ↓\n① 查询向量化 — 把问题转成向量\n  ↓\n② 向量检索 — 从知识库找最相关的文档片段\n  ↓\n③ 组装上下文 — 问题 + 检索到的文档片段 → 组合 prompt\n  ↓\n④ LLM 生成 — 大模型基于上下文生成回答\n  ↓\n⑤ 引用标注 — 标注回答来源（哪个文档的哪一段）\n",[110,7777,7775],{"__ignoreMap":108},[14,7779,7780],{"id":7780},"关键组件",[95,7782,2618],{"id":2618},[19,7784,7785],{},"存储文档的向量表示，支持相似度检索：",[357,7787,7788,7799],{},[360,7789,7790],{},[363,7791,7792,7795,7797],{},[366,7793,7794],{},"向量库",[366,7796,1215],{},[366,7798,426],{},[375,7800,7801,7809,7817,7826,7836,7844],{},[363,7802,7803,7805,7807],{},[380,7804,2638],{},[380,7806,2641],{},[380,7808,2644],{},[363,7810,7811,7813,7815],{},[380,7812,2649],{},[380,7814,2652],{},[380,7816,2655],{},[363,7818,7819,7821,7824],{},[380,7820,2660],{},[380,7822,7823],{},"分布式、大规模",[380,7825,2666],{},[363,7827,7828,7830,7833],{},[380,7829,2671],{},[380,7831,7832],{},"托管 SaaS",[380,7834,7835],{},"免运维",[363,7837,7838,7840,7842],{},[380,7839,2682],{},[380,7841,2685],{},[380,7843,2688],{},[363,7845,7846,7849,7851],{},[380,7847,7848],{},"SQLite-VSS \u002F libsql",[380,7850,2707],{},[380,7852,7853],{},"轻量部署",[95,7855,7857],{"id":7856},"embedding-模型","Embedding 模型",[19,7859,7860,7861,2524],{},"把文本转成向量，详见 ",[31,7862,2028],{"href":2027},[357,7864,7865,7875],{},[360,7866,7867],{},[363,7868,7869,7871,7873],{},[366,7870,2148],{},[366,7872,368],{},[366,7874,1215],{},[375,7876,7877,7887,7897,7905],{},[363,7878,7879,7882,7885],{},[380,7880,7881],{},"OpenAI text-embedding-3",[380,7883,7884],{},"1536\u002F3072",[380,7886,2168],{},[363,7888,7889,7892,7894],{},[380,7890,7891],{},"BGE-large-zh",[380,7893,2193],{},[380,7895,7896],{},"中文效果好",[363,7898,7899,7901,7903],{},[380,7900,2204],{},[380,7902,2193],{},[380,7904,2209],{},[363,7906,7907,7909,7911],{},[380,7908,2217],{},[380,7910,2193],{},[380,7912,2222],{},[95,7914,7915],{"id":7915},"文档分块",[19,7917,7918],{},"把长文档切成小块（chunk）是 RAG 的关键步骤：",[43,7920,7921,7927,7933],{},[46,7922,7923,7926],{},[23,7924,7925],{},"固定大小分块"," — 每 500-1000 token 一块，简单粗暴",[46,7928,7929,7932],{},[23,7930,7931],{},"语义分块"," — 按段落\u002F章节自然边界切分",[46,7934,7935,7938],{},[23,7936,7937],{},"滑动窗口"," — 相邻块有重叠（如 200 token），避免切断上下文",[19,7940,7941],{},"分块太短 → 检索准但上下文不足\n分块太长 → 上下文全但检索精度低",[14,7943,7945],{"id":7944},"rag-进阶模式","RAG 进阶模式",[95,7947,7949],{"id":7948},"朴素-rag","朴素 RAG",[19,7951,7952],{},"基础流程：query → embed → search → stuff → generate。简单但不够精准。",[95,7954,7956],{"id":7955},"高级-rag","高级 RAG",[19,7958,7959],{},"在基础流程上增加优化：",[43,7961,7962,7968,7974,7980],{},[46,7963,7964,7967],{},[23,7965,7966],{},"查询改写（Query Rewriting）"," — LLM 先把用户问题改写成更适合检索的形式",[46,7969,7970,7973],{},[23,7971,7972],{},"重排序（Reranking）"," — 向量检索后用 Cross-Encoder 重排序",[46,7975,7976,7979],{},[23,7977,7978],{},"多路召回（Hybrid Search）"," — 同时用向量检索 + BM25 关键词检索",[46,7981,7982,7985],{},[23,7983,7984],{},"上下文压缩"," — 检索后先摘要再喂给 LLM",[95,7987,2506],{"id":2505},[19,7989,7990],{},"传统做法：先切块再 embedding。问题是每个 chunk 失去全文上下文。",[19,7992,7993,7994,7997,7998,8001],{},"Late Chunking：先 embedding 整篇文档，",[23,7995,7996],{},"在 token-level embedding 输出后再做 pooling 切块","。每个 chunk 的向量保留了全文语境。Jina v3、BGE-M3 等模型支持。对",[23,7999,8000],{},"长文档 \u002F 需要跨段上下文","的场景效果显著提升。",[95,8003,8005],{"id":8004},"hydehypothetical-document-embeddings","HyDE（Hypothetical Document Embeddings）",[19,8007,8008,8009,8012],{},"思路：用户的问题很短、检索时和长文档难匹配。先让 LLM 基于问题\"假装回答\"一段，",[23,8010,8011],{},"用假回答去检索","——假回答和真文档语义近，召回率显著提升。",[103,8014,8017],{"className":8015,"code":8016,"language":220},[218],"用户问题 → LLM 生成假设答案 → embed 假答案 → 检索 → 真答案\n",[110,8018,8016],{"__ignoreMap":108},[19,8020,8021],{},"代价：多一次 LLM 调用 + 多一次 embedding。适合检索难命中的专业场景。",[95,8023,8025],{"id":8024},"multi-query-expansion","Multi-Query Expansion",[19,8027,8028],{},"让 LLM 把一个问题展开成多个角度的子问题，并行检索后融合：",[103,8030,8033],{"className":8031,"code":8032,"language":220},[218],"\"我们的退货政策\" →\n  ├─ \"退货流程\"\n  ├─ \"退款时效\"\n  └─ \"哪些商品不支持退货\"\n→ 三路检索 → 合并去重 → 喂给 LLM\n",[110,8034,8032],{"__ignoreMap":108},[19,8036,8037],{},"适合用户问题模糊、单次检索覆盖不全的情况。",[95,8039,8041],{"id":8040},"模块化-rag","模块化 RAG",[19,8043,8044],{},"把 RAG 拆成可替换的模块：检索、路由、融合、排序、生成。每一步都可以独立优化。",[14,8046,8048],{"id":8047},"评估指标怎么知道-rag-做得好不好","评估指标：怎么知道 RAG 做得好不好",[19,8050,8051],{},"不评估的 RAG 是黑盒。主流指标：",[357,8053,8054,8066],{},[360,8055,8056],{},[363,8057,8058,8060,8063],{},[366,8059,5527],{},[366,8061,8062],{},"衡量什么",[366,8064,8065],{},"怎么算",[375,8067,8068,8081,8095,8108,8121],{},[363,8069,8070,8075,8078],{},[380,8071,8072],{},[23,8073,8074],{},"Recall@K",[380,8076,8077],{},"检索召回率",[380,8079,8080],{},"top-K 检索结果中有多少真包含答案",[363,8082,8083,8089,8092],{},[380,8084,8085,8088],{},[23,8086,8087],{},"MRR","（Mean Reciprocal Rank）",[380,8090,8091],{},"第一个相关结果排第几",[380,8093,8094],{},"1\u002Frank 的平均值",[363,8096,8097,8102,8105],{},[380,8098,8099],{},[23,8100,8101],{},"Faithfulness",[380,8103,8104],{},"答案是否忠于检索内容",[380,8106,8107],{},"LLM-as-judge 逐句核对",[363,8109,8110,8115,8118],{},[380,8111,8112],{},[23,8113,8114],{},"Answer Relevance",[380,8116,8117],{},"答案是否回应了问题",[380,8119,8120],{},"LLM-as-judge 打分",[363,8122,8123,8128,8131],{},[380,8124,8125],{},[23,8126,8127],{},"Context Precision",[380,8129,8130],{},"检索内容有多大比例真用上了",[380,8132,8133],{},"看 LLM 生成里引用了哪些",[19,8135,8136],{},"主流工具：",[43,8138,8139,8145,8151],{},[46,8140,8141,8144],{},[23,8142,8143],{},"RAGAS"," — 开源 RAG 评测框架，前 4 个指标都有内置",[46,8146,8147,8150],{},[23,8148,8149],{},"TruLens"," — RAG 三角评估（context relevance \u002F groundedness \u002F answer relevance）",[46,8152,8153,8156],{},[23,8154,8155],{},"DeepEval"," — pytest 风格的 LLM 评测",[19,8158,8159,8161],{},[23,8160,3991],{},"：先打 100 条 golden case + 期望答案，跑 RAGAS 出基线。每次改 chunking \u002F embed model \u002F reranker 都跑一遍对比，别凭感觉。",[14,8163,8165],{"id":8164},"naive-rag-的常见失败案例","Naive RAG 的常见失败案例",[19,8167,8168],{},"知道怎么坏才能改对：",[59,8170,8171,8177,8186,8194,8200,8206],{},[46,8172,8173,8176],{},[23,8174,8175],{},"问题用代词，检索丢上下文","——「它的价格是多少？」「它」指什么？解决：检索前用 LLM 改写带历史的查询。",[46,8178,8179,8182,8183,8185],{},[23,8180,8181],{},"关键词命中但语义跑偏","——搜「苹果」匹配到水果商品而不是苹果公司财报。解决：",[31,8184,2523],{"href":2027}," BM25 + 向量。",[46,8187,8188,8191,8192,347],{},[23,8189,8190],{},"检索到了但被中间遗忘","——top-10 太长，关键文档被夹中段被忽略。解决：reranker 重排把最相关放首尾，参考 ",[31,8193,1473],{"href":1472},[46,8195,8196,8199],{},[23,8197,8198],{},"空召回硬答","——知识库里压根没有，模型不老实拒答反而幻觉。解决：检索分数低于阈值直接回退到\"未找到相关资料\"。",[46,8201,8202,8205],{},[23,8203,8204],{},"多文档冲突","——A 文档说 X，B 文档说 not X，模型选一个自圆其说。解决：让 LLM 显式标注冲突 + 让用户决策。",[46,8207,8208,8211],{},[23,8209,8210],{},"更新延迟","——文档更新了但索引没重建。解决：建立增量索引 pipeline + 版本号校验。",[14,8213,8215],{"id":8214},"生产-rag-的监控清单","生产 RAG 的监控清单",[19,8217,8218],{},"上线后要持续盯的信号：",[43,8220,8221,8227,8233,8239,8245,8251],{},[46,8222,8223,8226],{},[23,8224,8225],{},"检索分数分布","——突然下移说明 query 模式变了",[46,8228,8229,8232],{},[23,8230,8231],{},"空召回率","——多少请求没拿到 top-K 任何文档",[46,8234,8235,8238],{},[23,8236,8237],{},"延迟分布","——embedding \u002F 向量检索 \u002F rerank \u002F LLM 各阶段 p50\u002Fp95\u002Fp99",[46,8240,8241,8244],{},[23,8242,8243],{},"答案长度异常","——突然变短可能是模型在拒答，变长可能是幻觉扩写",[46,8246,8247,8250],{},[23,8248,8249],{},"用户反馈"," 👍\u002F👎 ——把负反馈样本回流到评测集",[46,8252,8253,8256,8257],{},[23,8254,8255],{},"token 成本","——RAG 把 prompt 撑长，成本容易失控，详见 ",[31,8258,2016],{"href":2015},[14,8260,8262],{"id":8261},"在哪些工具中用到-rag","在哪些工具中用到 RAG",[43,8264,8265,8270,8275,8281,8287],{},[46,8266,8267,8269],{},[23,8268,1233],{}," — 内置 RAG 流程，上传文档自动建索引",[46,8271,8272,8274],{},[23,8273,1244],{}," — 知识库为核心，RAG 优先设计",[46,8276,8277,8280],{},[23,8278,8279],{},"Coze"," — 知识库功能，支持自动分块",[46,8282,8283,8286],{},[23,8284,8285],{},"LangChain \u002F LlamaIndex"," — RAG 开发框架",[46,8288,8289,8292],{},[23,8290,8291],{},"各种客服 Bot \u002F 企业知识助手"," — RAG 是标准架构",[14,8294,8296],{"id":8295},"rag-vs-fine-tuning","RAG vs Fine-tuning",[357,8298,8299,8309],{},[360,8300,8301],{},[363,8302,8303,8305,8307],{},[366,8304,368],{},[366,8306,2024],{},[366,8308,2858],{},[375,8310,8311,8320,8330,8338,8348],{},[363,8312,8313,8315,8318],{},[380,8314,2899],{},[380,8316,8317],{},"实时（更新文档即可）",[380,8319,2902],{},[363,8321,8322,8324,8327],{},[380,8323,2910],{},[380,8325,8326],{},"低（只需检索+推理）",[380,8328,8329],{},"高（需要 GPU 训练）",[363,8331,8332,8334,8336],{},[380,8333,2943],{},[380,8335,2949],{},[380,8337,2946],{},[363,8339,8340,8342,8345],{},[380,8341,426],{},[380,8343,8344],{},"事实性问答、知识检索",[380,8346,8347],{},"调整模型风格\u002F格式",[363,8349,8350,8352,8354],{},[380,8351,2954],{},[380,8353,2960],{},[380,8355,2957],{},[19,8357,8358,8361,8362,347],{},[23,8359,8360],{},"建议","：大多数企业知识问答场景用 RAG，不要 fine-tune。详细对比见 ",[31,8363,2843],{"href":5023},[14,8365,733],{"id":733},[43,8367,8368,8373,8377,8383],{},[46,8369,8370,8371],{},"底层基础：",[31,8372,2028],{"href":2027},[46,8374,2785,8375],{},[31,8376,1473],{"href":1472},[46,8378,8379,8380],{},"幻觉缓解：",[31,8381,8382],{"href":7758},"Hallucination",[46,8384,8385,8386],{},"与微调对比：",[31,8387,2843],{"href":5023},{"title":108,"searchDepth":140,"depth":140,"links":8389},[8390,8391,8392,8393,8398,8406,8407,8408,8409,8410,8411],{"id":7729,"depth":123,"text":7730},{"id":38,"depth":123,"text":38},{"id":7771,"depth":123,"text":7771},{"id":7780,"depth":123,"text":7780,"children":8394},[8395,8396,8397],{"id":2618,"depth":140,"text":2618},{"id":7856,"depth":140,"text":7857},{"id":7915,"depth":140,"text":7915},{"id":7944,"depth":123,"text":7945,"children":8399},[8400,8401,8402,8403,8404,8405],{"id":7948,"depth":140,"text":7949},{"id":7955,"depth":140,"text":7956},{"id":2505,"depth":140,"text":2506},{"id":8004,"depth":140,"text":8005},{"id":8024,"depth":140,"text":8025},{"id":8040,"depth":140,"text":8041},{"id":8047,"depth":123,"text":8048},{"id":8164,"depth":123,"text":8165},{"id":8214,"depth":123,"text":8215},{"id":8261,"depth":123,"text":8262},{"id":8295,"depth":123,"text":8296},{"id":733,"depth":123,"text":733},"architecture",{},"\u002Fwiki\u002Frag",[787,2832,786],{"title":7724,"description":108},"rag","wiki\u002Frag","Retrieval-Augmented Generation，让 AI 在生成回答前先从知识库检索相关信息，解决大模型知识过时和幻觉问题。",[2024,8421,8422,8423],"架构","知识库","向量检索","5XqkMX0BGIsSkQIeRVDiX5NwHBMAsd9bzBk6QMZuWF8",{"id":8426,"title":8427,"body":8428,"category":1498,"description":108,"extension":780,"meta":9176,"navigation":310,"path":9177,"published":783,"relatedModels":9178,"relatedTools":784,"seo":9179,"slug":9180,"stem":9181,"summary":9182,"tags":9183,"updated":783,"__hash__":9186},"wiki\u002Fwiki\u002Ftemperature-top-p.md","Temperature 与 Top-P（采样参数）",{"type":11,"value":8429,"toc":9147},[8430,8434,8437,8451,8455,8457,8460,8466,8480,8483,8552,8555,8558,8578,8582,8585,8591,8602,8608,8611,8668,8672,8675,8681,8687,8690,8694,8701,8707,8763,8766,8779,8782,8785,8791,8794,8910,8913,8988,8998,9002,9008,9023,9026,9030,9035,9077,9086,9088,9092,9095,9099,9102,9106,9109,9113,9116,9120,9126,9128,9144],[14,8431,8433],{"id":8432},"什么是-temperature-和-top-p","什么是 Temperature 和 Top-P",[19,8435,8436],{},"Temperature 和 Top-P 是控制 LLM 输出随机性的两个参数。它们决定了模型在生成文本时\"有多保守\"或\"有多创造性\"。",[43,8438,8439,8445],{},[46,8440,8441,8444],{},[23,8442,8443],{},"Temperature"," — 调整概率分布的平坦度，值越高输出越随机",[46,8446,8447,8450],{},[23,8448,8449],{},"Top-P","（核采样） — 只从累积概率超过 P 的候选词中选，限制选择范围",[14,8452,8454],{"id":8453},"temperature-详解","Temperature 详解",[95,8456,2362],{"id":2362},[19,8458,8459],{},"模型在每一步预测下一个 token 时，会计算所有可能 token 的概率。Temperature 通过调整 logits（概率前的分数）来改变分布形状：",[103,8461,8464],{"className":8462,"code":8463,"language":220},[218],"调整后概率 = softmax(logits \u002F temperature)\n",[110,8465,8463],{"__ignoreMap":108},[43,8467,8468,8471,8474,8477],{},[46,8469,8470],{},"Temperature = 1.0：原始概率分布不变",[46,8472,8473],{},"Temperature \u003C 1.0：概率分布变\"尖锐\"——高概率词更高、低概率词更低 → 更确定",[46,8475,8476],{},"Temperature > 1.0：概率分布变\"平坦\"——各词概率更均匀 → 更随机",[46,8478,8479],{},"Temperature = 0：完全贪心——永远选概率最高的词",[95,8481,8482],{"id":8482},"实际效果",[357,8484,8485,8495],{},[360,8486,8487],{},[363,8488,8489,8491,8493],{},[366,8490,8443],{},[366,8492,1853],{},[366,8494,426],{},[375,8496,8497,8508,8519,8530,8541],{},[363,8498,8499,8502,8505],{},[380,8500,8501],{},"0",[380,8503,8504],{},"完全确定性，每次回答相同",[380,8506,8507],{},"代码生成、数据抽取、事实问答",[363,8509,8510,8513,8516],{},[380,8511,8512],{},"0.3",[380,8514,8515],{},"高度确定，偶尔有变化",[380,8517,8518],{},"代码审查、文档摘要",[363,8520,8521,8524,8527],{},[380,8522,8523],{},"0.7",[380,8525,8526],{},"平衡（大多数 API 默认值）",[380,8528,8529],{},"通用对话、问答",[363,8531,8532,8535,8538],{},[380,8533,8534],{},"1.0",[380,8536,8537],{},"较有创造性",[380,8539,8540],{},"文案写作、头脑风暴",[363,8542,8543,8546,8549],{},[380,8544,8545],{},"1.5+",[380,8547,8548],{},"高度随机，可能出现乱码",[380,8550,8551],{},"创意写作（慎用）",[95,8553,8554],{"id":8554},"示例",[19,8556,8557],{},"同一个 prompt \"写一首关于秋天的诗\"：",[43,8559,8560,8566,8572],{},[46,8561,8562,8565],{},[23,8563,8564],{},"Temp 0","：每次生成完全相同的诗",[46,8567,8568,8571],{},[23,8569,8570],{},"Temp 0.7","：每次不同的诗，但风格相似",[46,8573,8574,8577],{},[23,8575,8576],{},"Temp 1.5","：每次差异巨大，可能出现非常规表达",[14,8579,8581],{"id":8580},"top-p-详解","Top-P 详解",[95,8583,2362],{"id":8584},"原理-1",[19,8586,8587,8588,2524],{},"Top-P（nucleus sampling，核采样）不是调整概率分布，而是",[23,8589,8590],{},"限制候选范围",[59,8592,8593,8596,8599],{},[46,8594,8595],{},"把所有候选 token 按概率从高到低排序",[46,8597,8598],{},"累积概率，直到达到 P 值",[46,8600,8601],{},"只从这些 token 中采样",[103,8603,8606],{"className":8604,"code":8605,"language":220},[218],"P = 0.9 → 只从累积概率达 90% 的最可能 token 中选\nP = 0.1 → 只选概率最高的极少数 token\nP = 1.0 → 不限制，所有 token 都可能被选\n",[110,8607,8605],{"__ignoreMap":108},[95,8609,8482],{"id":8610},"实际效果-1",[357,8612,8613,8623],{},[360,8614,8615],{},[363,8616,8617,8619,8621],{},[366,8618,8449],{},[366,8620,1853],{},[366,8622,426],{},[375,8624,8625,8636,8647,8658],{},[363,8626,8627,8630,8633],{},[380,8628,8629],{},"0.1",[380,8631,8632],{},"非常保守",[380,8634,8635],{},"代码生成、事实问答",[363,8637,8638,8641,8644],{},[380,8639,8640],{},"0.5",[380,8642,8643],{},"较保守",[380,8645,8646],{},"文档摘要、分类",[363,8648,8649,8652,8655],{},[380,8650,8651],{},"0.9",[380,8653,8654],{},"平衡（默认）",[380,8656,8657],{},"通用对话",[363,8659,8660,8662,8665],{},[380,8661,8534],{},[380,8663,8664],{},"不限制",[380,8666,8667],{},"创意写作",[14,8669,8671],{"id":8670},"min_p新一代采样","min_p：新一代采样",[19,8673,8674],{},"2024 年后流行的第三个参数，部分开源推理框架（vLLM \u002F llama.cpp \u002F SGLang）和一些 API 已支持。",[19,8676,8677,8680],{},[23,8678,8679],{},"思路","：top-p 在\"概率分布很尖\"时会留太多噪音 token；min_p 设一个相对阈值——只要 token 的概率不低于「最高概率 token × min_p」就保留。",[103,8682,8685],{"className":8683,"code":8684,"language":220},[218],"min_p = 0.05 表示：保留所有概率 ≥ 0.05 × max_prob 的 token\n",[110,8686,8684],{"__ignoreMap":108},[19,8688,8689],{},"实测在创意写作场景，min_p=0.05~0.1 比 top_p=0.9 输出质量更稳定（既不过度保守也不会跑飞）。新模型推理时可以试试。",[14,8691,8693],{"id":8692},"seed-与确定性","seed 与确定性",[19,8695,8696,8697,8700],{},"Temperature=0 ",[23,8698,8699],{},"不等于"," 完全确定性。同一个 temperature=0 的请求，OpenAI \u002F Anthropic 多次跑结果有时仍不同——浮点运算非确定性 + batch 调度差异导致。",[19,8702,8703,8704,2524],{},"要更稳的复现，可以传 ",[110,8705,8706],{},"seed",[103,8708,8710],{"className":3691,"code":8709,"language":3693,"meta":108,"style":108},"# OpenAI\nclient.chat.completions.create(\n    model=\"gpt-5\", temperature=0, seed=42, ...\n)\n# 响应里会带 system_fingerprint，相同 fingerprint + seed 才能保证完全一致\n",[110,8711,8712,8717,8722,8754,8758],{"__ignoreMap":108},[113,8713,8714],{"class":115,"line":116},[113,8715,8716],{"class":3716},"# OpenAI\n",[113,8718,8719],{"class":115,"line":123},[113,8720,8721],{"class":119},"client.chat.completions.create(\n",[113,8723,8724,8727,8729,8732,8734,8736,8738,8740,8742,8744,8746,8749,8751],{"class":115,"line":140},[113,8725,8726],{"class":5890},"    model",[113,8728,3704],{"class":3703},[113,8730,8731],{"class":133},"\"gpt-5\"",[113,8733,165],{"class":119},[113,8735,4347],{"class":5890},[113,8737,3704],{"class":3703},[113,8739,8501],{"class":126},[113,8741,165],{"class":119},[113,8743,8706],{"class":5890},[113,8745,3704],{"class":3703},[113,8747,8748],{"class":126},"42",[113,8750,165],{"class":119},[113,8752,8753],{"class":126},"...\n",[113,8755,8756],{"class":115,"line":153},[113,8757,5875],{"class":119},[113,8759,8760],{"class":115,"line":174},[113,8761,8762],{"class":3716},"# 响应里会带 system_fingerprint，相同 fingerprint + seed 才能保证完全一致\n",[19,8764,8765],{},"注意：",[43,8767,8768,8773,8776],{},[46,8769,8770,8772],{},[110,8771,8706],{}," 是 best-effort，模型升级 \u002F 基础设施变动会让 fingerprint 变",[46,8774,8775],{},"Anthropic \u002F Google 早期不支持 seed，新版本逐步加入",[46,8777,8778],{},"真的要 100% 确定性（比如单元测试），用 mock 替代 LLM 调用",[14,8780,8781],{"id":8781},"怎么搭配使用",[95,8783,8784],{"id":8784},"一般原则",[19,8786,8787,8790],{},[23,8788,8789],{},"不要同时调两个","。OpenAI 官方建议：要么调 Temperature，要么调 Top-P，不要同时改。",[95,8792,8793],{"id":8793},"推荐配置",[357,8795,8796,8809],{},[360,8797,8798],{},[363,8799,8800,8802,8804,8806],{},[366,8801,5978],{},[366,8803,8443],{},[366,8805,8449],{},[366,8807,8808],{},"理由",[375,8810,8811,8824,8837,8849,8862,8874,8885,8897],{},[363,8812,8813,8816,8818,8821],{},[380,8814,8815],{},"代码生成",[380,8817,8501],{},[380,8819,8820],{},"1",[380,8822,8823],{},"完全确定，代码不应有\"创意\"",[363,8825,8826,8829,8832,8834],{},[380,8827,8828],{},"代码审查",[380,8830,8831],{},"0.2",[380,8833,8820],{},[380,8835,8836],{},"高度确定，偶尔看不同角度",[363,8838,8839,8842,8844,8846],{},[380,8840,8841],{},"数据抽取",[380,8843,8501],{},[380,8845,8820],{},[380,8847,8848],{},"严格按格式输出",[363,8850,8851,8854,8857,8859],{},[380,8852,8853],{},"工具调用（Function Calling）",[380,8855,8856],{},"0 ~ 0.2",[380,8858,8820],{},[380,8860,8861],{},"见下节",[363,8863,8864,8867,8869,8871],{},[380,8865,8866],{},"客服 Bot",[380,8868,8640],{},[380,8870,8651],{},[380,8872,8873],{},"适度变化，但不跑题",[363,8875,8876,8878,8880,8882],{},[380,8877,8657],{},[380,8879,8523],{},[380,8881,8820],{},[380,8883,8884],{},"平衡",[363,8886,8887,8890,8892,8894],{},[380,8888,8889],{},"文案写作",[380,8891,8651],{},[380,8893,8820],{},[380,8895,8896],{},"需要创意",[363,8898,8899,8902,8905,8907],{},[380,8900,8901],{},"头脑风暴",[380,8903,8904],{},"1.2",[380,8906,8820],{},[380,8908,8909],{},"越发散越好",[95,8911,8912],{"id":8912},"各平台默认值",[357,8914,8915,8930],{},[360,8916,8917],{},[363,8918,8919,8921,8924,8927],{},[366,8920,1209],{},[366,8922,8923],{},"默认 Temperature",[366,8925,8926],{},"默认 Top-P",[366,8928,8929],{},"备注",[375,8931,8932,8943,8954,8965,8977],{},[363,8933,8934,8937,8939,8941],{},[380,8935,8936],{},"OpenAI API",[380,8938,8534],{},[380,8940,8534],{},[380,8942,2465],{},[363,8944,8945,8948,8950,8952],{},[380,8946,8947],{},"Anthropic API",[380,8949,8534],{},[380,8951,8534],{},[380,8953,2465],{},[363,8955,8956,8958,8960,8963],{},[380,8957,3848],{},[380,8959,8534],{},[380,8961,8962],{},"0.95",[380,8964,2465],{},[363,8966,8967,8970,8972,8974],{},[380,8968,8969],{},"DeepSeek API",[380,8971,8534],{},[380,8973,8534],{},[380,8975,8976],{},"官方文档建议代码 0.0 \u002F 通用 1.3",[363,8978,8979,8982,8984,8986],{},[380,8980,8981],{},"国内 GLM",[380,8983,8962],{},[380,8985,8523],{},[380,8987,2465],{},[19,8989,8990,8993,8994,8997],{},[23,8991,8992],{},"踩坑","：很多人以为默认是 0.7，其实主流大厂默认是 1.0。如果你的应用想要保守输出，",[23,8995,8996],{},"必须显式设置","，不要假设默认值。",[14,8999,9001],{"id":9000},"推理模型为什么不建议改-temperature","推理模型为什么不建议改 temperature",[19,9003,9004,9005,2524],{},"GPT-5 reasoning、Claude Opus 4 thinking、DeepSeek-R1、o3 等推理模型，官方都建议",[23,9006,9007],{},"保持默认 temperature=1.0",[43,9009,9010,9013,9020],{},[46,9011,9012],{},"推理模型的思维链本身依赖概率采样的多样性来\"探索\"不同解法",[46,9014,9015,9016,9019],{},"强制 t=0 会让推理路径单一，反而",[23,9017,9018],{},"降低","复杂问题的成功率",[46,9021,9022],{},"思维链长度 × 低多样性 = 容易陷入死循环",[19,9024,9025],{},"对推理模型，控制输出应该靠 prompt 描述目标和约束，而不是调采样参数。",[14,9027,9029],{"id":9028},"温度对-function-calling-稳定性的影响","温度对 Function Calling 稳定性的影响",[19,9031,9032,9034],{},[31,9033,429],{"href":756},"场景，temperature 影响很关键：",[357,9036,9037,9046],{},[360,9038,9039],{},[363,9040,9041,9043],{},[366,9042,8443],{},[366,9044,9045],{},"影响",[375,9047,9048,9055,9062,9069],{},[363,9049,9050,9052],{},[380,9051,8501],{},[380,9053,9054],{},"工具选择最稳定，参数提取准确率最高",[363,9056,9057,9059],{},[380,9058,8831],{},[380,9060,9061],{},"几乎和 0 一样，偶尔在多个相近工具间选",[363,9063,9064,9066],{},[380,9065,8523],{},[380,9067,9068],{},"模型可能\"灵活\"地选不太对的工具、参数飘",[363,9070,9071,9074],{},[380,9072,9073],{},"1.0+",[380,9075,9076],{},"工具调用稳定性显著下降，生产慎用",[19,9078,9079,9081,9082,9085],{},[23,9080,3991],{},"：所有工具调用场景，先把 temperature 设 0 验证基线效果，再视情况微调。",[23,9083,9084],{},"别用默认 1.0 直接上工具调用","，调试起来很痛苦。",[14,9087,3355],{"id":3355},[95,9089,9091],{"id":9090},"误区-1-temperature-0-就不会出错","误区 1: \"Temperature = 0 就不会出错\"",[19,9093,9094],{},"Temperature = 0 只保证（接近）确定性，不保证正确性。模型可能每次都\"确定地\"给出错误答案。",[95,9096,9098],{"id":9097},"误区-2-调高-temperature-模型更聪明","误区 2: \"调高 Temperature 模型更聪明\"",[19,9100,9101],{},"Temperature 调高只是让输出更随机，不会让模型变聪明。高 Temperature 反而可能导致逻辑混乱。",[95,9103,9105],{"id":9104},"误区-3-top-p-和-temperature-效果一样","误区 3: \"Top-P 和 Temperature 效果一样\"",[19,9107,9108],{},"不完全一样。Top-P 是硬截断（低概率词完全排除），Temperature 是软调整（低概率词概率降低但不为 0）。",[95,9110,9112],{"id":9111},"误区-4-推理模型也要-temp0","误区 4: \"推理模型也要 temp=0\"",[19,9114,9115],{},"错。推理模型靠思维链探索，强制 0 会让它失去推理多样性，反而表现下降。",[95,9117,9119],{"id":9118},"误区-5-改-temperature-可以解决幻觉","误区 5: \"改 temperature 可以解决幻觉\"",[19,9121,9122,9123,9125],{},"只能轻微缓解。",[31,9124,5060],{"href":7758},"的根本解法是 RAG + grounded generation，不是调采样参数。",[14,9127,733],{"id":733},[43,9129,9130,9135,9140],{},[46,9131,9132,9133],{},"输出控制：",[31,9134,1934],{"href":2008},[46,9136,9137,9138],{},"工具稳定性：",[31,9139,757],{"href":756},[46,9141,8379,9142],{},[31,9143,8382],{"href":7758},[760,9145,9146],{},"html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .s4XuR, html code.shiki .s4XuR{--shiki-default:#E36209;--shiki-dark:#FFAB70}html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":108,"searchDepth":140,"depth":140,"links":9148},[9149,9150,9155,9159,9160,9161,9166,9167,9168,9175],{"id":8432,"depth":123,"text":8433},{"id":8453,"depth":123,"text":8454,"children":9151},[9152,9153,9154],{"id":2362,"depth":140,"text":2362},{"id":8482,"depth":140,"text":8482},{"id":8554,"depth":140,"text":8554},{"id":8580,"depth":123,"text":8581,"children":9156},[9157,9158],{"id":8584,"depth":140,"text":2362},{"id":8610,"depth":140,"text":8482},{"id":8670,"depth":123,"text":8671},{"id":8692,"depth":123,"text":8693},{"id":8781,"depth":123,"text":8781,"children":9162},[9163,9164,9165],{"id":8784,"depth":140,"text":8784},{"id":8793,"depth":140,"text":8793},{"id":8912,"depth":140,"text":8912},{"id":9000,"depth":123,"text":9001},{"id":9028,"depth":123,"text":9029},{"id":3355,"depth":123,"text":3355,"children":9169},[9170,9171,9172,9173,9174],{"id":9090,"depth":140,"text":9091},{"id":9097,"depth":140,"text":9098},{"id":9104,"depth":140,"text":9105},{"id":9111,"depth":140,"text":9112},{"id":9118,"depth":140,"text":9119},{"id":733,"depth":123,"text":733},{},"\u002Fwiki\u002Ftemperature-top-p",[1502,1503],{"title":8427,"description":108},"temperature-top-p","wiki\u002Ftemperature-top-p","控制 LLM 输出随机性的两个核心参数：Temperature 调节概率分布的平坦度，Top-P 限制候选词范围。",[8443,8449,9184,9185],"采样","参数调优","BvXFYUS66mHKafqko--Hxl2TOXnOvIwrSx9s9ZhMrBY",{"id":9188,"title":9189,"body":9190,"category":1498,"description":108,"extension":780,"meta":10064,"navigation":310,"path":10065,"published":783,"relatedModels":10066,"relatedTools":784,"seo":10067,"slug":10068,"stem":10069,"summary":10070,"tags":10071,"updated":783,"__hash__":10074},"wiki\u002Fwiki\u002Ftoken.md","Token（令牌）",{"type":11,"value":9191,"toc":10040},[9192,9196,9199,9202,9208,9212,9216,9219,9225,9229,9232,9287,9291,9294,9298,9301,9357,9360,9366,9369,9389,9393,9396,9469,9478,9482,9485,9503,9506,9664,9691,9694,9697,9776,9782,9785,9788,9842,9845,9848,9928,9931,9935,9988,9990,9994,10001,10005,10011,10015,10018,10020,10037],[14,9193,9195],{"id":9194},"什么是-token","什么是 Token",[19,9197,9198],{},"Token 是大语言模型处理文本的基本单位。模型不是逐字符或逐词处理文本，而是把文本切分成一个个 token。",[19,9200,9201],{},"对于英文：1 个 token ≈ 0.75 个单词（4 个字符）\n对于中文：1 个 token ≈ 1-2 个汉字",[103,9203,9206],{"className":9204,"code":9205,"language":220},[218],"英文: \"Hello world\"        → [\"Hello\", \" world\"]           → 2 tokens\n中文: \"你好世界\"           → [\"你好\", \"世界\"]               → 2-4 tokens\n代码: \"function add(a,b)\"  → [\"function\", \" add\", \"(\", \"a\", \",\", \"b\", \")\"] → 7 tokens\n",[110,9207,9205],{"__ignoreMap":108},[14,9209,9211],{"id":9210},"为什么-token-重要","为什么 Token 重要",[95,9213,9215],{"id":9214},"_1-决定-api-费用","1. 决定 API 费用",[19,9217,9218],{},"所有 LLM API 都按 token 计费。Token 越多，费用越高。",[103,9220,9223],{"className":9221,"code":9222,"language":220},[218],"100 万 token ≈ 75 万英文单词 ≈ 50-100 万汉字\n",[110,9224,9222],{"__ignoreMap":108},[95,9226,9228],{"id":9227},"_2-决定上下文窗口","2. 决定上下文窗口",[19,9230,9231],{},"模型能处理的上下文长度以 token 为单位：",[357,9233,9234,9245],{},[360,9235,9236],{},[363,9237,9238,9240,9242],{},[366,9239,2148],{},[366,9241,1399],{},[366,9243,9244],{},"约等于",[375,9246,9247,9257,9267,9277],{},[363,9248,9249,9251,9254],{},[380,9250,4844],{},[380,9252,9253],{},"128K",[380,9255,9256],{},"一本中篇小说",[363,9258,9259,9261,9264],{},[380,9260,3928],{},[380,9262,9263],{},"200K",[380,9265,9266],{},"一本长篇小说",[363,9268,9269,9271,9274],{},[380,9270,4834],{},[380,9272,9273],{},"400K",[380,9275,9276],{},"两部长篇小说",[363,9278,9279,9281,9284],{},[380,9280,3941],{},[380,9282,9283],{},"1M",[380,9285,9286],{},"一套百科全书",[95,9288,9290],{"id":9289},"_3-决定速度","3. 决定速度",[19,9292,9293],{},"Token 越多，推理时间越长。生成 1000 token 大约需要 3-10 秒（取决于模型）。",[14,9295,9297],{"id":9296},"input-output-cache-三种价格","Input \u002F Output \u002F Cache 三种价格",[19,9299,9300],{},"API 计费早就不是\"按 token 一口价\"了，常见至少分三档：",[357,9302,9303,9316],{},[360,9304,9305],{},[363,9306,9307,9310,9313],{},[366,9308,9309],{},"类别",[366,9311,9312],{},"含义",[366,9314,9315],{},"相对价格（粗略）",[375,9317,9318,9331,9344],{},[363,9319,9320,9325,9328],{},[380,9321,9322],{},[23,9323,9324],{},"Input",[380,9326,9327],{},"你发送给模型的 prompt（含 system + history + query）",[380,9329,9330],{},"1x",[363,9332,9333,9338,9341],{},[380,9334,9335],{},[23,9336,9337],{},"Output",[380,9339,9340],{},"模型生成的内容",[380,9342,9343],{},"3-5x Input",[363,9345,9346,9351,9354],{},[380,9347,9348],{},[23,9349,9350],{},"Cached Input",[380,9352,9353],{},"命中 prompt cache 的输入部分",[380,9355,9356],{},"0.1-0.5x Input",[19,9358,9359],{},"举例：Claude Sonnet 4（数量级，会变动）：",[103,9361,9364],{"className":9362,"code":9363,"language":220},[218],"Input:        $3 \u002F M token\nOutput:      $15 \u002F M token\nCache write: $3.75 \u002F M（写一次缓存，5 分钟有效，可续）\nCache read:  $0.30 \u002F M（命中后 -90%）\n",[110,9365,9363],{"__ignoreMap":108},[19,9367,9368],{},"设计应用时三句话原则：",[59,9370,9371,9377,9383],{},[46,9372,9373,9376],{},[23,9374,9375],{},"能少 output 就少 output","——Output 比 Input 贵 3-5 倍。让模型只返回必要内容、不要重复用户的话。",[46,9378,9379,9382],{},[23,9380,9381],{},"能复用 input 就开缓存","——长 system prompt、tool definitions、知识库文档放最前面，缓存一次反复用。",[46,9384,9385,9388],{},[23,9386,9387],{},"能批量就批量","——OpenAI \u002F Anthropic 都有 Batch API，24h 内出结果，价格 -50%。",[14,9390,9392],{"id":9391},"prompt-cache-计费机制","Prompt Cache 计费机制",[19,9394,9395],{},"各家细节不同，但模式接近：",[357,9397,9398,9413],{},[360,9399,9400],{},[363,9401,9402,9404,9407,9410],{},[366,9403,1209],{},[366,9405,9406],{},"触发方式",[366,9408,9409],{},"TTL",[366,9411,9412],{},"折扣",[375,9414,9415,9429,9443,9457],{},[363,9416,9417,9419,9423,9426],{},[380,9418,396],{},[380,9420,1689,9421,1693],{},[110,9422,1692],{},[380,9424,9425],{},"5 分钟（可续到 1 小时）",[380,9427,9428],{},"Read -90%",[363,9430,9431,9434,9437,9440],{},[380,9432,9433],{},"OpenAI",[380,9435,9436],{},"自动缓存 ≥1024 token 的前缀",[380,9438,9439],{},"~10 分钟",[380,9441,9442],{},"Read -50%",[363,9444,9445,9447,9451,9454],{},[380,9446,7230],{},[380,9448,1689,9449,1718],{},[110,9450,1717],{},[380,9452,9453],{},"默认 1 小时（可设）",[380,9455,9456],{},"Read -75% + 按存储时长收费",[363,9458,9459,9462,9464,9467],{},[380,9460,9461],{},"DeepSeek",[380,9463,1729],{},[380,9465,9466],{},"~ 数小时",[380,9468,9428],{},[19,9470,9471,9474,9475,347],{},[23,9472,9473],{},"生效条件","：必须前缀完全一致。哪怕在 system prompt 开头加一个时间戳，整个缓存就废了。所以",[23,9476,9477],{},"动态内容要严格放在 prompt 末尾",[14,9479,9481],{"id":9480},"token-计数工具","Token 计数工具",[95,9483,9484],{"id":9484},"在线工具",[43,9486,9487,9495],{},[46,9488,9489,9494],{},[31,9490,9493],{"href":9491,"rel":9492},"https:\u002F\u002Fplatform.openai.com\u002Ftokenizer",[6572],"OpenAI Tokenizer"," — GPT 系列分词器",[46,9496,9497,9502],{},[31,9498,9501],{"href":9499,"rel":9500},"https:\u002F\u002Ftiktokenizer.vercel.app\u002F",[6572],"Tiktokenizer"," — 支持 GPT \u002F Claude \u002F Llama \u002F DeepSeek 等多家分词器对比",[95,9504,9505],{"id":9505},"代码计数",[103,9507,9509],{"className":3691,"code":9508,"language":3693,"meta":108,"style":108},"# OpenAI 的 tiktoken 库\nimport tiktoken\nenc = tiktoken.encoding_for_model(\"gpt-4o\")\ntokens = enc.encode(\"你好世界，Hello world\")\nprint(len(tokens))  # 输出 token 数\n\n# Anthropic：通过 SDK 直接调\nfrom anthropic import Anthropic\nclient = Anthropic()\ncount = client.messages.count_tokens(\n    model=\"claude-sonnet-4\",\n    messages=[{\"role\": \"user\", \"content\": \"你好世界\"}]\n)\nprint(count.input_tokens)\n",[110,9510,9511,9516,9523,9538,9553,9570,9574,9579,9591,9601,9611,9622,9653,9657],{"__ignoreMap":108},[113,9512,9513],{"class":115,"line":116},[113,9514,9515],{"class":3716},"# OpenAI 的 tiktoken 库\n",[113,9517,9518,9520],{"class":115,"line":123},[113,9519,5840],{"class":3703},[113,9521,9522],{"class":119}," tiktoken\n",[113,9524,9525,9528,9530,9533,9536],{"class":115,"line":140},[113,9526,9527],{"class":119},"enc ",[113,9529,3704],{"class":3703},[113,9531,9532],{"class":119}," tiktoken.encoding_for_model(",[113,9534,9535],{"class":133},"\"gpt-4o\"",[113,9537,5875],{"class":119},[113,9539,9540,9543,9545,9548,9551],{"class":115,"line":153},[113,9541,9542],{"class":119},"tokens ",[113,9544,3704],{"class":3703},[113,9546,9547],{"class":119}," enc.encode(",[113,9549,9550],{"class":133},"\"你好世界，Hello world\"",[113,9552,5875],{"class":119},[113,9554,9555,9558,9561,9564,9567],{"class":115,"line":174},[113,9556,9557],{"class":126},"print",[113,9559,9560],{"class":119},"(",[113,9562,9563],{"class":126},"len",[113,9565,9566],{"class":119},"(tokens))  ",[113,9568,9569],{"class":3716},"# 输出 token 数\n",[113,9571,9572],{"class":115,"line":187},[113,9573,311],{"emptyLinePlaceholder":310},[113,9575,9576],{"class":115,"line":5},[113,9577,9578],{"class":3716},"# Anthropic：通过 SDK 直接调\n",[113,9580,9581,9583,9586,9588],{"class":115,"line":272},[113,9582,5834],{"class":3703},[113,9584,9585],{"class":119}," anthropic ",[113,9587,5840],{"class":3703},[113,9589,9590],{"class":119}," Anthropic\n",[113,9592,9593,9596,9598],{"class":115,"line":278},[113,9594,9595],{"class":119},"client ",[113,9597,3704],{"class":3703},[113,9599,9600],{"class":119}," Anthropic()\n",[113,9602,9603,9606,9608],{"class":115,"line":284},[113,9604,9605],{"class":119},"count ",[113,9607,3704],{"class":3703},[113,9609,9610],{"class":119}," client.messages.count_tokens(\n",[113,9612,9613,9615,9617,9620],{"class":115,"line":290},[113,9614,8726],{"class":5890},[113,9616,3704],{"class":3703},[113,9618,9619],{"class":133},"\"claude-sonnet-4\"",[113,9621,137],{"class":119},[113,9623,9624,9627,9629,9632,9635,9637,9640,9642,9645,9647,9650],{"class":115,"line":296},[113,9625,9626],{"class":5890},"    messages",[113,9628,3704],{"class":3703},[113,9630,9631],{"class":119},"[{",[113,9633,9634],{"class":133},"\"role\"",[113,9636,130],{"class":119},[113,9638,9639],{"class":133},"\"user\"",[113,9641,165],{"class":119},[113,9643,9644],{"class":133},"\"content\"",[113,9646,130],{"class":119},[113,9648,9649],{"class":133},"\"你好世界\"",[113,9651,9652],{"class":119},"}]\n",[113,9654,9655],{"class":115,"line":302},[113,9656,5875],{"class":119},[113,9658,9659,9661],{"class":115,"line":307},[113,9660,9557],{"class":126},[113,9662,9663],{"class":119},"(count.input_tokens)\n",[103,9665,9669],{"className":9666,"code":9667,"language":9668,"meta":108,"style":108},"language-javascript shiki shiki-themes github-light github-dark","\u002F\u002F 浏览器端\nimport { encode } from 'gpt-tokenizer'\nconst tokens = encode('你好世界，Hello world')\nconsole.log(tokens.length)\n","javascript",[110,9670,9671,9676,9681,9686],{"__ignoreMap":108},[113,9672,9673],{"class":115,"line":116},[113,9674,9675],{},"\u002F\u002F 浏览器端\n",[113,9677,9678],{"class":115,"line":123},[113,9679,9680],{},"import { encode } from 'gpt-tokenizer'\n",[113,9682,9683],{"class":115,"line":140},[113,9684,9685],{},"const tokens = encode('你好世界，Hello world')\n",[113,9687,9688],{"class":115,"line":153},[113,9689,9690],{},"console.log(tokens.length)\n",[14,9692,9693],{"id":9693},"不同模型的分词差异",[19,9695,9696],{},"不同模型使用不同的分词器，同样的文本 token 数可能差异巨大：",[357,9698,9699,9715],{},[360,9700,9701],{},[363,9702,9703,9706,9708,9711,9713],{},[366,9704,9705],{},"文本",[366,9707,4844],{},[366,9709,9710],{},"Claude",[366,9712,7230],{},[366,9714,9461],{},[375,9716,9717,9731,9744,9760],{},[363,9718,9719,9722,9725,9727,9729],{},[380,9720,9721],{},"\"Hello world\"",[380,9723,9724],{},"2",[380,9726,9724],{},[380,9728,9724],{},[380,9730,9724],{},[363,9732,9733,9735,9738,9740,9742],{},[380,9734,9649],{},[380,9736,9737],{},"4",[380,9739,9724],{},[380,9741,9737],{},[380,9743,9724],{},[363,9745,9746,9749,9752,9755,9758],{},[380,9747,9748],{},"1000 行 Python 代码",[380,9750,9751],{},"~8000",[380,9753,9754],{},"~7500",[380,9756,9757],{},"~9000",[380,9759,9754],{},[363,9761,9762,9765,9768,9771,9773],{},[380,9763,9764],{},"1000 字中文文档",[380,9766,9767],{},"~1700",[380,9769,9770],{},"~1200",[380,9772,9767],{},[380,9774,9775],{},"~1100",[19,9777,9778,9781],{},[23,9779,9780],{},"结论","：中文场景下 Claude \u002F DeepSeek \u002F Qwen 这类有中文优化的 tokenizer 比 GPT 省 30-50% token。同样的对话，GPT 跑下来可能比 Claude 贵不少——尤其是中文 input 多的场景。",[14,9783,9784],{"id":9784},"实用估算",[19,9786,9787],{},"快速估算 token 数量的经验法则：",[357,9789,9790,9800],{},[360,9791,9792],{},[363,9793,9794,9797],{},[366,9795,9796],{},"文本类型",[366,9798,9799],{},"1K token ≈",[375,9801,9802,9810,9818,9826,9834],{},[363,9803,9804,9807],{},[380,9805,9806],{},"英文文本",[380,9808,9809],{},"750 单词",[363,9811,9812,9815],{},[380,9813,9814],{},"中文文本",[380,9816,9817],{},"500-700 汉字",[363,9819,9820,9823],{},[380,9821,9822],{},"代码",[380,9824,9825],{},"30-50 行",[363,9827,9828,9831],{},[380,9829,9830],{},"JSON",[380,9832,9833],{},"100-200 行",[363,9835,9836,9839],{},[380,9837,9838],{},"Markdown",[380,9840,9841],{},"500-700 字",[14,9843,9844],{"id":9844},"上下文压缩策略",[19,9846,9847],{},"长对话 \u002F Agent 长任务里 token 涨得最快。常见压缩手段：",[357,9849,9850,9862],{},[360,9851,9852],{},[363,9853,9854,9857,9859],{},[366,9855,9856],{},"策略",[366,9858,1976],{},[366,9860,9861],{},"副作用",[375,9863,9864,9876,9889,9902,9915],{},[363,9865,9866,9870,9873],{},[380,9867,9868],{},[23,9869,7937],{},[380,9871,9872],{},"多轮对话保留最近 N 轮",[380,9874,9875],{},"早期上下文丢失",[363,9877,9878,9883,9886],{},[380,9879,9880],{},[23,9881,9882],{},"历史总结",[380,9884,9885],{},"把旧对话用 LLM 总结成几百 token",[380,9887,9888],{},"多一次 LLM 调用、细节丢失",[363,9890,9891,9896,9899],{},[380,9892,9893],{},[23,9894,9895],{},"工具输出截断",[380,9897,9898],{},"Agent 调工具返回大量数据时只保留摘要 + 引用 ID",[380,9900,9901],{},"模型可能再次请求完整数据",[363,9903,9904,9909,9912],{},[380,9905,9906],{},[23,9907,9908],{},"检索式压缩",[380,9910,9911],{},"旧消息存向量库，按需检索",[380,9913,9914],{},"工程复杂度高",[363,9916,9917,9922,9925],{},[380,9918,9919],{},[23,9920,9921],{},"Map-Reduce",[380,9923,9924],{},"长文档先并行小段总结、再合并",[380,9926,9927],{},"跨段语义可能被切断",[19,9929,9930],{},"Claude Code \u002F Cursor 长会话时都内置了自动总结——当上下文超过阈值，会把前面对话压缩成几百 token 的摘要保留。",[14,9932,9934],{"id":9933},"省-token-技巧","省 Token 技巧",[59,9936,9937,9943,9949,9955,9961,9970,9976,9982],{},[46,9938,9939,9942],{},[23,9940,9941],{},"精简 prompt"," — 删除冗余措辞，用简洁的指令",[46,9944,9945,9948],{},[23,9946,9947],{},"压缩历史"," — 长对话用摘要替代完整历史",[46,9950,9951,9954],{},[23,9952,9953],{},"缓存重复内容"," — 使用 prompt caching（Anthropic \u002F OpenAI \u002F Gemini \u002F DeepSeek 都支持）",[46,9956,9957,9960],{},[23,9958,9959],{},"选对模型"," — 简单任务用 Haiku \u002F Flash \u002F Mini 而非 Sonnet \u002F Pro \u002F GPT-5",[46,9962,9963,9966,9967],{},[23,9964,9965],{},"控制输出长度"," — 明确要求\"用 100 字以内回答\"或设 ",[110,9968,9969],{},"max_tokens",[46,9971,9972,9975],{},[23,9973,9974],{},"少用 Few-shot"," — 推理模型不需要太多示例",[46,9977,9978,9981],{},[23,9979,9980],{},"批量化"," — 同一类请求合并到 Batch API，价格直接 -50%",[46,9983,9984,9987],{},[23,9985,9986],{},"结构化输出"," — 让模型只输出 JSON 而不是\"自然语言 + JSON\"，Output 少一半",[14,9989,3355],{"id":3355},[95,9991,9993],{"id":9992},"误区-1-我用-gpt-比-claude-便宜因为单价低","误区 1: \"我用 GPT 比 Claude 便宜，因为单价低\"",[19,9995,9996,9997,10000],{},"不一定。GPT tokenizer 在中文场景下 token 数显著更多，单价低但总账不一定低。",[23,9998,9999],{},"算账要按\"完成同样任务的总成本\"","，不是单 token 价。",[95,10002,10004],{"id":10003},"误区-2-200k-上下文我就尽量塞满","误区 2: \"200K 上下文我就尽量塞满\"",[19,10006,10007,10008,10010],{},"参考 ",[31,10009,1473],{"href":1472},"——超过 80% 上下文窗口后模型质量明显下降。塞满还多花了钱，得不偿失。",[95,10012,10014],{"id":10013},"误区-3-output-反正不多没必要省","误区 3: \"Output 反正不多没必要省\"",[19,10016,10017],{},"Output 单价是 Input 的 3-5 倍。让模型回答\"以下是详细分析：...\"这类填充话，每次都在烧钱。",[14,10019,733],{"id":733},[43,10021,10022,10027,10032],{},[46,10023,10024,10025],{},"上下文窗口管理：",[31,10026,1473],{"href":1472},[46,10028,10029,10030],{},"采样参数（影响 Output 长度）：",[31,10031,4352],{"href":4351},[46,10033,10034,10035],{},"输出结构化（省 token）：",[31,10036,757],{"href":756},[760,10038,10039],{},"html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .s4XuR, html code.shiki .s4XuR{--shiki-default:#E36209;--shiki-dark:#FFAB70}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":108,"searchDepth":140,"depth":140,"links":10041},[10042,10043,10048,10049,10050,10054,10055,10056,10057,10058,10063],{"id":9194,"depth":123,"text":9195},{"id":9210,"depth":123,"text":9211,"children":10044},[10045,10046,10047],{"id":9214,"depth":140,"text":9215},{"id":9227,"depth":140,"text":9228},{"id":9289,"depth":140,"text":9290},{"id":9296,"depth":123,"text":9297},{"id":9391,"depth":123,"text":9392},{"id":9480,"depth":123,"text":9481,"children":10051},[10052,10053],{"id":9484,"depth":140,"text":9484},{"id":9505,"depth":140,"text":9505},{"id":9693,"depth":123,"text":9693},{"id":9784,"depth":123,"text":9784},{"id":9844,"depth":123,"text":9844},{"id":9933,"depth":123,"text":9934},{"id":3355,"depth":123,"text":3355,"children":10059},[10060,10061,10062],{"id":9992,"depth":140,"text":9993},{"id":10003,"depth":140,"text":10004},{"id":10013,"depth":140,"text":10014},{"id":733,"depth":123,"text":733},{},"\u002Fwiki\u002Ftoken",[1502,1503],{"title":9189,"description":108},"token","wiki\u002Ftoken","大模型处理文本的最小单位。一个 Token 约等于 0.75 个英文单词或 1-2 个汉字。Token 数量决定 API 费用和上下文窗口占用。",[2016,10072,10073],"基础概念","定价","gbHEyHh8pBEr1Saq2lobbZq4ogKbMzLTlLlaRX-Vvec",{"id":10076,"title":10077,"body":10078,"category":2057,"description":108,"extension":780,"meta":10724,"navigation":310,"path":10725,"published":783,"relatedModels":10726,"relatedTools":10727,"seo":10731,"slug":10732,"stem":10733,"summary":10734,"tags":10735,"updated":783,"__hash__":10738},"wiki\u002Fwiki\u002Fvibe-coding.md","Vibe Coding",{"type":11,"value":10079,"toc":10695},[10080,10084,10090,10093,10105,10108,10177,10181,10185,10191,10195,10198,10202,10208,10212,10216,10220,10276,10280,10283,10327,10331,10334,10413,10419,10423,10426,10432,10435,10438,10441,10464,10468,10470,10490,10493,10513,10516,10519,10551,10558,10560,10563,10569,10572,10575,10586,10589,10592,10612,10616,10619,10639,10642,10674,10676],[14,10081,10083],{"id":10082},"什么是-vibe-coding","什么是 Vibe Coding",[19,10085,10086,10087,347],{},"Vibe Coding 是 Andrej Karpathy 在 2025 年初提出的概念：",[23,10088,10089],{},"不再逐行写代码，而是用自然语言描述你想要什么，让 AI 生成代码，你只做审查和调整",[19,10091,10092],{},"\"Vibe\" 的意思是——跟着感觉走，用对话的方式编码，像跟一个全栈工程师 pair programming。",[86,10094,10095,10098],{},[19,10096,10097],{},"Karpathy 原帖大意：「我在 Vibe Coding 时，会忘记代码是存在的。我只看到屏幕上有东西在动，能用就行。出了 bug，我把报错粘给 AI，让它修，通常就修好了。我不再 review 它做的每一处修改。」",[19,10099,10100,10101,10104],{},"这段话有个常被忽略的关键前提——他做的是",[23,10102,10103],{},"周末玩具项目","。Karpathy 自己后来也澄清过，生产代码不应该这么干。",[14,10106,10107],{"id":10107},"与传统编程的区别",[357,10109,10110,10121],{},[360,10111,10112],{},[363,10113,10114,10116,10119],{},[366,10115,368],{},[366,10117,10118],{},"传统编程",[366,10120,10077],{},[375,10122,10123,10134,10145,10156,10167],{},[363,10124,10125,10128,10131],{},[380,10126,10127],{},"交互方式",[380,10129,10130],{},"键盘敲代码",[380,10132,10133],{},"自然语言对话",[363,10135,10136,10139,10142],{},[380,10137,10138],{},"开发者角色",[380,10140,10141],{},"写代码",[380,10143,10144],{},"审查代码、描述意图",[363,10146,10147,10150,10153],{},[380,10148,10149],{},"思考粒度",[380,10151,10152],{},"语法级（怎么写）",[380,10154,10155],{},"架构级（写什么）",[363,10157,10158,10161,10164],{},[380,10159,10160],{},"效率瓶颈",[380,10162,10163],{},"打字速度",[380,10165,10166],{},"提问精度",[363,10168,10169,10171,10174],{},[380,10170,1324],{},[380,10172,10173],{},"精确控制",[380,10175,10176],{},"快速原型、CRUD",[14,10178,10180],{"id":10179},"vibe-coding-工作流","Vibe Coding 工作流",[95,10182,10184],{"id":10183},"_1-描述意图","1. 描述意图",[103,10186,10189],{"className":10187,"code":10188,"language":220},[218],"帮我做一个 TODO 应用：\n- 添加\u002F删除\u002F标记完成\n- 本地存储\n- 暗色主题\n- 响应式布局\n",[110,10190,10188],{"__ignoreMap":108},[95,10192,10194],{"id":10193},"_2-ai-生成代码","2. AI 生成代码",[19,10196,10197],{},"AI 一次性或分步生成完整代码。",[95,10199,10201],{"id":10200},"_3-审查与调整","3. 审查与调整",[103,10203,10206],{"className":10204,"code":10205,"language":220},[218],"布局不错，但列表项太挤了，增加 padding。\n删除按钮加个确认弹窗。\n",[110,10207,10205],{"__ignoreMap":108},[95,10209,10211],{"id":10210},"_4-迭代直到满意","4. 迭代直到满意",[14,10213,10215],{"id":10214},"适合-vibe-coding-的工具","适合 Vibe Coding 的工具",[95,10217,10219],{"id":10218},"ai-ide","AI IDE",[357,10221,10222,10233],{},[360,10223,10224],{},[363,10225,10226,10228,10230],{},[366,10227,1038],{},[366,10229,1215],{},[366,10231,10232],{},"Vibe Coding 适配度",[375,10234,10235,10245,10256,10265],{},[363,10236,10237,10239,10242],{},[380,10238,6885],{},[380,10240,10241],{},"Composer 多文件改写",[380,10243,10244],{},"★★★★★",[363,10246,10247,10250,10253],{},[380,10248,10249],{},"Trae",[380,10251,10252],{},"字节出品，国内直连",[380,10254,10255],{},"★★★★☆",[363,10257,10258,10260,10263],{},[380,10259,6891],{},[380,10261,10262],{},"Cascade 模式",[380,10264,10255],{},[363,10266,10267,10270,10273],{},[380,10268,10269],{},"Qoder",[380,10271,10272],{},"阿里出品",[380,10274,10275],{},"★★★☆☆",[95,10277,10279],{"id":10278},"ai-应用生成器","AI 应用生成器",[19,10281,10282],{},"这类工具更进一步——不需要 IDE，直接在浏览器里对话生成完整应用：",[357,10284,10285,10293],{},[360,10286,10287],{},[363,10288,10289,10291],{},[366,10290,1038],{},[366,10292,1215],{},[375,10294,10295,10303,10311,10319],{},[363,10296,10297,10300],{},[380,10298,10299],{},"Bolt.new",[380,10301,10302],{},"一句话生成全栈应用，浏览器内运行",[363,10304,10305,10308],{},[380,10306,10307],{},"Lovable",[380,10309,10310],{},"AI 全栈开发，内置部署",[363,10312,10313,10316],{},[380,10314,10315],{},"v0",[380,10317,10318],{},"Vercel 出品，专注前端 UI 生成",[363,10320,10321,10324],{},[380,10322,10323],{},"Replit Agent",[380,10325,10326],{},"云端全流程：编码→测试→部署",[14,10328,10330],{"id":10329},"vibe-coding-vs-spec-driven-development","Vibe Coding vs Spec-Driven Development",[19,10332,10333],{},"2026 年另一种主流流派——Spec-Driven Development（规格驱动开发，如 GitHub Spec Kit）走的是相反路线：",[357,10335,10336,10347],{},[360,10337,10338],{},[363,10339,10340,10342,10344],{},[366,10341,368],{},[366,10343,10077],{},[366,10345,10346],{},"Spec-Driven",[375,10348,10349,10360,10371,10382,10393,10402],{},[363,10350,10351,10354,10357],{},[380,10352,10353],{},"起点",[380,10355,10356],{},"一句话目标",[380,10358,10359],{},"详细 spec \u002F 验收标准",[363,10361,10362,10365,10368],{},[380,10363,10364],{},"AI 自由度",[380,10366,10367],{},"高",[380,10369,10370],{},"受 spec 严格约束",[363,10372,10373,10376,10379],{},[380,10374,10375],{},"适合阶段",[380,10377,10378],{},"探索、原型",[380,10380,10381],{},"生产、团队协作",[363,10383,10384,10387,10390],{},[380,10385,10386],{},"出错成本",[380,10388,10389],{},"改了再改",[380,10391,10392],{},"改 spec → 重新生成",[363,10394,10395,10398,10400],{},[380,10396,10397],{},"可审计性",[380,10399,4826],{},[380,10401,10367],{},[363,10403,10404,10407,10410],{},[380,10405,10406],{},"代表工具",[380,10408,10409],{},"Cursor \u002F Bolt",[380,10411,10412],{},"GitHub Spec Kit \u002F Claude Code + AGENTS.md",[19,10414,10415,10418],{},[23,10416,10417],{},"实务中两者结合","：探索期 Vibe Coding 快速验证，进入生产前补 spec \u002F 测试 \u002F AGENTS.md 把它\"固化\"成可维护的代码。",[14,10420,10422],{"id":10421},"vibe-hardened-流程从玩具到生产","Vibe → Hardened 流程：从玩具到生产",[19,10424,10425],{},"Vibe Coding 出来的代码上线前必须经过\"加固\"流程：",[103,10427,10430],{"className":10428,"code":10429,"language":220},[218],"[Vibe 阶段]                    [Hardened 阶段]\n对话生成                  →    补单元测试 + 集成测试\n功能能跑                  →    Code Review（人 + AI 双重）\n能用就行                  →    边界条件、错误处理补全\n本地 SQLite               →    生产 DB + 迁移脚本\n密钥写死                  →    走 env \u002F secret 管理\nconsole.log 满天          →    结构化日志 + 监控\n没有 CI                   →    Lint \u002F 类型检查 \u002F 测试 \u002F 部署 pipeline\n没有文档                  →    README + AGENTS.md \u002F CLAUDE.md\n",[110,10431,10429],{"__ignoreMap":108},[19,10433,10434],{},"跳过 Hardened 直接上线，是 vibe coding 翻车最常见原因。",[14,10436,10437],{"id":10437},"版本控制策略",[19,10439,10440],{},"Vibe Coding 容易让 AI 一次改一大片，下面三招防止 commit 爆炸：",[59,10442,10443,10449,10455],{},[46,10444,10445,10448],{},[23,10446,10447],{},"小步提交","——每完成一个功能就 commit，不要\"全部改完再统一提\"。AI 一次性回滚单个 commit 比从乱了的工作区里挑改动容易得多。",[46,10450,10451,10454],{},[23,10452,10453],{},"分支隔离实验","——任何\"AI，帮我试一下用别的方案\"先开新分支，跑通再合，跑废就删。别在主分支直接让它改。",[46,10456,10457,558,10460,10463],{},[23,10458,10459],{},"diff 必看",[110,10461,10462],{},"git diff"," 即使你 vibe，也要扫一眼变了多少行、动了哪些文件。AI 偶尔会\"顺手\"重写无关代码，不看 diff 就放过去。",[14,10465,10467],{"id":10466},"vibe-coding-的边界","Vibe Coding 的边界",[95,10469,2438],{"id":2438},[43,10471,10472,10475,10478,10481,10484,10487],{},[46,10473,10474],{},"✅ CRUD 应用、管理后台",[46,10476,10477],{},"✅ 落地页、营销网站",[46,10479,10480],{},"✅ 原型验证、MVP 开发",[46,10482,10483],{},"✅ 数据处理脚本、一次性自动化",[46,10485,10486],{},"✅ 单页应用（React\u002FVue）",[46,10488,10489],{},"✅ 个人玩具项目（Karpathy 本意）",[95,10491,10492],{"id":10492},"不适合",[43,10494,10495,10498,10501,10504,10507,10510],{},[46,10496,10497],{},"❌ 高并发系统（需要精细性能优化）",[46,10499,10500],{},"❌ 底层系统编程（OS\u002F驱动\u002F编译器）",[46,10502,10503],{},"❌ 安全关键系统（医疗\u002F航空\u002F金融核心）",[46,10505,10506],{},"❌ 大型已有项目重构（上下文太长）",[46,10508,10509],{},"❌ 需要精确算法实现（不如手写可靠）",[46,10511,10512],{},"❌ 多人协作的生产代码（无 spec 难协同）",[14,10514,10515],{"id":10515},"真实失败案例",[19,10517,10518],{},"社区流传的几个翻车故事，可以反向学习：",[59,10520,10521,10527,10533,10539,10545],{},[46,10522,10523,10526],{},[23,10524,10525],{},"「能跑就行」结果跑不动","——开发者 vibe 出一个 SaaS 上线，没看 AI 生成的数据库查询逻辑。前 100 个用户没事，第 1000 个用户时发现每个请求都在做全表扫描，数据库 CPU 100%。",[46,10528,10529,10532],{},[23,10530,10531],{},"密钥泄露","——AI 生成的代码里直接硬编码 OpenAI API key，开发者没看就 push 到公开 repo，几小时被薅几千刀。",[46,10534,10535,10538],{},[23,10536,10537],{},"AI 删了文件没提","——让 AI\"重构一下\"，它顺手删了一个看似无用的 utils 文件，实际上有别处依赖。本地跑通是因为缓存，部署后崩。",[46,10540,10541,10544],{},[23,10542,10543],{},"\"测试也帮我加上\"——AI 写了测试，但测的是\"实现是否调用了 mock\"而非\"业务是否正确\"","。100% 覆盖率，0 个 bug 被发现。",[46,10546,10547,10550],{},[23,10548,10549],{},"prompt injection 没防","——AI 写的客服 bot 直接把用户输入拼到 system prompt，第一周就被用户绕过限制泄露内部数据。",[19,10552,10553,10554,10557],{},"共性：",[23,10555,10556],{},"没有审查 = 没有质量门","。Vibe Coding 把\"写代码的速度\"加快了 10 倍，如果不同时把\"审查 + 测试\"也加快 10 倍，bug 数也会跟着翻 10 倍。",[14,10559,1403],{"id":1403},[95,10561,10562],{"id":10562},"提示词技巧",[103,10564,10567],{"className":10565,"code":10566,"language":220},[218],"❌ \"做个网站\"\n✅ \"用 Next.js 14 App Router 做一个 SaaS 落地页：\n    - Hero 区：标题 + 副标题 + CTA 按钮\n    - Features 区：3 列图标 + 文字\n    - Pricing 区：3 档价格卡片\n    - Footer 区：链接 + 版权\n    - 用 Tailwind CSS，暗色主题\n    - 响应式，移动端优先\"\n",[110,10568,10566],{"__ignoreMap":108},[95,10570,10571],{"id":10571},"分步迭代",[19,10573,10574],{},"不要一次性要求 AI 做完所有事。分步迭代：",[59,10576,10577,10580,10583],{},[46,10578,10579],{},"先生成骨架（布局 + 路由）",[46,10581,10582],{},"再填充内容（文案 + 样式）",[46,10584,10585],{},"最后加交互（状态管理 + API）",[95,10587,10588],{"id":10588},"保持审查",[19,10590,10591],{},"Vibe Coding 不等于不看代码。你必须：",[43,10593,10594,10597,10600,10603,10606],{},[46,10595,10596],{},"理解 AI 生成的每一行代码",[46,10598,10599],{},"检查安全漏洞（XSS、SQL 注入、密钥泄露）",[46,10601,10602],{},"验证业务逻辑正确性",[46,10604,10605],{},"测试边界情况",[46,10607,10608,10609,10611],{},"看 ",[110,10610,10462],{}," 而不是只看 AI 的总结",[14,10613,10615],{"id":10614},"vibe-coding-会取代程序员吗","Vibe Coding 会取代程序员吗",[19,10617,10618],{},"短答案：不会，但会改变程序员的工作方式。",[43,10620,10621,10627,10633],{},[46,10622,10623,10626],{},[23,10624,10625],{},"初级程序员"," — 从\"写代码\"变成\"审代码\"，门槛降低",[46,10628,10629,10632],{},[23,10630,10631],{},"高级程序员"," — 效率 10x，一个人能做原来一个团队的活",[46,10634,10635,10638],{},[23,10636,10637],{},"非程序员"," — 能做出简单应用，但复杂系统仍需专业开发者",[19,10640,10641],{},"未来程序员的核心竞争力不是\"会写代码\"，而是：",[59,10643,10644,10650,10656,10662,10668],{},[46,10645,10646,10649],{},[23,10647,10648],{},"系统设计能力"," — 知道该做什么、怎么拆分",[46,10651,10652,10655],{},[23,10653,10654],{},"代码审查能力"," — 能判断 AI 生成代码的质量",[46,10657,10658,10661],{},[23,10659,10660],{},"调试能力"," — AI 出错时能定位和修复",[46,10663,10664,10667],{},[23,10665,10666],{},"领域知识"," — 理解业务需求，翻译为技术方案",[46,10669,10670,10673],{},[23,10671,10672],{},"测试能力"," — 知道怎么验证 AI 的输出真的是对的",[14,10675,733],{"id":733},[43,10677,10678,10685,10690],{},[46,10679,10680,10681,562,10683],{},"配套思路：",[31,10682,1934],{"href":2008},[31,10684,1473],{"href":1472},[46,10686,10687,10688],{},"自主性更高的选项：",[31,10689,749],{"href":748},[46,10691,10692,10693],{},"工具底层机制：",[31,10694,757],{"href":756},{"title":108,"searchDepth":140,"depth":140,"links":10696},[10697,10698,10699,10705,10709,10710,10711,10712,10716,10717,10722,10723],{"id":10082,"depth":123,"text":10083},{"id":10107,"depth":123,"text":10107},{"id":10179,"depth":123,"text":10180,"children":10700},[10701,10702,10703,10704],{"id":10183,"depth":140,"text":10184},{"id":10193,"depth":140,"text":10194},{"id":10200,"depth":140,"text":10201},{"id":10210,"depth":140,"text":10211},{"id":10214,"depth":123,"text":10215,"children":10706},[10707,10708],{"id":10218,"depth":140,"text":10219},{"id":10278,"depth":140,"text":10279},{"id":10329,"depth":123,"text":10330},{"id":10421,"depth":123,"text":10422},{"id":10437,"depth":123,"text":10437},{"id":10466,"depth":123,"text":10467,"children":10713},[10714,10715],{"id":2438,"depth":140,"text":2438},{"id":10492,"depth":140,"text":10492},{"id":10515,"depth":123,"text":10515},{"id":1403,"depth":123,"text":1403,"children":10718},[10719,10720,10721],{"id":10562,"depth":140,"text":10562},{"id":10571,"depth":140,"text":10571},{"id":10588,"depth":140,"text":10588},{"id":10614,"depth":123,"text":10615},{"id":733,"depth":123,"text":733},{},"\u002Fwiki\u002Fvibe-coding",[1502,1503],[2062,10728,10729,10730],"coding\u002Fide\u002Ftrae","coding\u002Fbuilder\u002Fbolt-new","coding\u002Fbuilder\u002Flovable",{"title":10077,"description":108},"vibe-coding","wiki\u002Fvibe-coding","用自然语言与 AI 对话式编程，开发者描述意图、AI 写代码，像指挥一个全栈工程师干活。",[10077,10736,10737,6885],"编程方法论","AI编程","9UckVqIxysE5n7rliIiWL7i_6LVgt6hJrhow9jgY9Bc",{"tools":4,"reviews":5,"playbooks":284,"news":272},1782316491304]