[{"data":1,"prerenderedAt":13017},["ShallowReactive",2],{"header-counts":3,"models-list":6,"footer-counts":13016},{"tools":4,"reviews":5},65,7,[7,941,1730,2748,3723,4550,5425,6210,7243,8221,9117,10095,10970,11965],{"id":8,"title":9,"apiCompatible":10,"benchmarks":12,"body":22,"category":907,"contextWindow":908,"description":909,"extension":910,"maxOutput":911,"meta":912,"navigation":177,"path":913,"pricing":914,"published":915,"relatedTools":916,"releaseDate":919,"seo":920,"slug":921,"stem":922,"strengths":923,"updated":915,"useCases":929,"vendor":934,"vendorEn":934,"weaknesses":935,"__hash__":940},"models\u002Fmodels\u002Fclaude-haiku-4.md","Claude Haiku 4",[11],"anthropic",[13,16,19],{"name":14,"score":15},"SWE-bench Verified","56.1%",{"name":17,"score":18},"HumanEval","87.1%",{"name":20,"score":21},"MMLU","83.4%",{"type":23,"value":24,"toc":888},"minimark",[25,29,33,36,40,43,46,102,105,108,112,115,119,127,131,306,310,313,487,490,493,564,567,583,587,590,604,607,611,693,700,703,782,786,789,806,809,823,826,852,855,884],[26,27,28],"h2",{"id":28},"概述",[30,31,32],"p",{},"Claude Haiku 4 是 Anthropic 于 2025 年 5 月与 Sonnet 4 \u002F Opus 4 同步发布的轻量模型。定位为\"快速 + 低成本\"选项，速度是 Sonnet 4 的 3 倍，价格仅 1\u002F3。",[26,34,35],{"id":35},"核心能力",[37,38,39],"h3",{"id":39},"极速响应",[30,41,42],{},"Haiku 4 的首 token 延迟约 0.5 秒，是 Sonnet 4 的 1\u002F3。对于实时代码补全、流式聊天等场景，这个速度差异至关重要。",[30,44,45],{},"实测吞吐：",[47,48,49,65],"table",{},[50,51,52],"thead",{},[53,54,55,59,62],"tr",{},[56,57,58],"th",{},"场景",[56,60,61],{},"Haiku 4",[56,63,64],{},"Sonnet 4",[66,67,68,80,91],"tbody",{},[53,69,70,74,77],{},[71,72,73],"td",{},"首 token 延迟",[71,75,76],{},"~0.5s",[71,78,79],{},"~1.5s",[53,81,82,85,88],{},[71,83,84],{},"流式吞吐",[71,86,87],{},"~80 tok\u002Fs",[71,89,90],{},"~50 tok\u002Fs",[53,92,93,96,99],{},[71,94,95],{},"1K 字回答总时长",[71,97,98],{},"~2s",[71,100,101],{},"~6s",[37,103,104],{"id":104},"编程能力不打折",[30,106,107],{},"SWE-bench Verified 56.1%，远超同价位的 GPT-4o-mini（33.2%）和 Gemini Flash（43.8%）。在代码补全场景下，Haiku 4 的体验接近 Sonnet 4 的 80%。",[37,109,111],{"id":110},"_200k-上下文","200K 上下文",[30,113,114],{},"与 Sonnet 4 共享 200K 上下文窗口。可以用低成本处理长文档、全文件分析等任务。",[37,116,118],{"id":117},"prompt-cache-仍然支持","Prompt Cache 仍然支持",[30,120,121,122,126],{},"Haiku 4 同样支持 prompt cache，Cache Read 价格只有 $0.10\u002FM——便宜到几乎可以忽略。",[123,124,125],"strong",{},"高频固定 prompt 场景务必开启","。",[26,128,130],{"id":129},"api-调用示例","API 调用示例",[132,133,138],"pre",{"className":134,"code":135,"language":136,"meta":137,"style":137},"language-python shiki shiki-themes github-light github-dark","from anthropic import Anthropic\nclient = Anthropic()\n\n# 极致速度场景：流式 + 短 max_tokens\nwith client.messages.stream(\n    model=\"claude-haiku-4-20250522\",\n    max_tokens=500,           # 不要默认 16K，限制输出长度提速\n    temperature=0,\n    messages=[{\"role\": \"user\", \"content\": prompt}],\n) as stream:\n    for text in stream.text_stream:\n        yield text\n","python","",[139,140,141,160,172,179,186,195,211,228,241,270,282,297],"code",{"__ignoreMap":137},[142,143,146,150,154,157],"span",{"class":144,"line":145},"line",1,[142,147,149],{"class":148},"szBVR","from",[142,151,153],{"class":152},"sVt8B"," anthropic ",[142,155,156],{"class":148},"import",[142,158,159],{"class":152}," Anthropic\n",[142,161,163,166,169],{"class":144,"line":162},2,[142,164,165],{"class":152},"client ",[142,167,168],{"class":148},"=",[142,170,171],{"class":152}," Anthropic()\n",[142,173,175],{"class":144,"line":174},3,[142,176,178],{"emptyLinePlaceholder":177},true,"\n",[142,180,182],{"class":144,"line":181},4,[142,183,185],{"class":184},"sJ8bj","# 极致速度场景：流式 + 短 max_tokens\n",[142,187,189,192],{"class":144,"line":188},5,[142,190,191],{"class":148},"with",[142,193,194],{"class":152}," client.messages.stream(\n",[142,196,198,202,204,208],{"class":144,"line":197},6,[142,199,201],{"class":200},"s4XuR","    model",[142,203,168],{"class":148},[142,205,207],{"class":206},"sZZnC","\"claude-haiku-4-20250522\"",[142,209,210],{"class":152},",\n",[142,212,213,216,218,222,225],{"class":144,"line":5},[142,214,215],{"class":200},"    max_tokens",[142,217,168],{"class":148},[142,219,221],{"class":220},"sj4cs","500",[142,223,224],{"class":152},",           ",[142,226,227],{"class":184},"# 不要默认 16K，限制输出长度提速\n",[142,229,231,234,236,239],{"class":144,"line":230},8,[142,232,233],{"class":200},"    temperature",[142,235,168],{"class":148},[142,237,238],{"class":220},"0",[142,240,210],{"class":152},[142,242,244,247,249,252,255,258,261,264,267],{"class":144,"line":243},9,[142,245,246],{"class":200},"    messages",[142,248,168],{"class":148},[142,250,251],{"class":152},"[{",[142,253,254],{"class":206},"\"role\"",[142,256,257],{"class":152},": ",[142,259,260],{"class":206},"\"user\"",[142,262,263],{"class":152},", ",[142,265,266],{"class":206},"\"content\"",[142,268,269],{"class":152},": prompt}],\n",[142,271,273,276,279],{"class":144,"line":272},10,[142,274,275],{"class":152},") ",[142,277,278],{"class":148},"as",[142,280,281],{"class":152}," stream:\n",[142,283,285,288,291,294],{"class":144,"line":284},11,[142,286,287],{"class":148},"    for",[142,289,290],{"class":152}," text ",[142,292,293],{"class":148},"in",[142,295,296],{"class":152}," stream.text_stream:\n",[142,298,300,303],{"class":144,"line":299},12,[142,301,302],{"class":148},"        yield",[142,304,305],{"class":152}," text\n",[37,307,309],{"id":308},"批量处理batch-api","批量处理（Batch API）",[30,311,312],{},"Haiku 4 配合 Anthropic Batch API，价格再 -50%（变成 Input $0.5\u002FM · Output $2.5\u002FM），24 小时内返回。适合：",[132,314,316],{"className":134,"code":315,"language":136,"meta":137,"style":137},"# 提交 10000 条要分类的文本\nbatch = client.messages.batches.create(\n    requests=[\n        {\n            \"custom_id\": f\"task-{i}\",\n            \"params\": {\n                \"model\": \"claude-haiku-4-20250522\",\n                \"max_tokens\": 100,\n                \"messages\": [{\"role\": \"user\", \"content\": f\"分类：{text}\"}],\n            }\n        }\n        for i, text in enumerate(texts)\n    ]\n)\n# 轮询 batch.id 直到 status='ended'\n",[139,317,318,323,333,343,348,375,383,394,406,443,448,453,469,475,481],{"__ignoreMap":137},[142,319,320],{"class":144,"line":145},[142,321,322],{"class":184},"# 提交 10000 条要分类的文本\n",[142,324,325,328,330],{"class":144,"line":162},[142,326,327],{"class":152},"batch ",[142,329,168],{"class":148},[142,331,332],{"class":152}," client.messages.batches.create(\n",[142,334,335,338,340],{"class":144,"line":174},[142,336,337],{"class":200},"    requests",[142,339,168],{"class":148},[142,341,342],{"class":152},"[\n",[142,344,345],{"class":144,"line":181},[142,346,347],{"class":152},"        {\n",[142,349,350,353,355,358,361,364,367,370,373],{"class":144,"line":188},[142,351,352],{"class":206},"            \"custom_id\"",[142,354,257],{"class":152},[142,356,357],{"class":148},"f",[142,359,360],{"class":206},"\"task-",[142,362,363],{"class":220},"{",[142,365,366],{"class":152},"i",[142,368,369],{"class":220},"}",[142,371,372],{"class":206},"\"",[142,374,210],{"class":152},[142,376,377,380],{"class":144,"line":197},[142,378,379],{"class":206},"            \"params\"",[142,381,382],{"class":152},": {\n",[142,384,385,388,390,392],{"class":144,"line":5},[142,386,387],{"class":206},"                \"model\"",[142,389,257],{"class":152},[142,391,207],{"class":206},[142,393,210],{"class":152},[142,395,396,399,401,404],{"class":144,"line":230},[142,397,398],{"class":206},"                \"max_tokens\"",[142,400,257],{"class":152},[142,402,403],{"class":220},"100",[142,405,210],{"class":152},[142,407,408,411,414,416,418,420,422,424,426,428,431,433,436,438,440],{"class":144,"line":243},[142,409,410],{"class":206},"                \"messages\"",[142,412,413],{"class":152},": [{",[142,415,254],{"class":206},[142,417,257],{"class":152},[142,419,260],{"class":206},[142,421,263],{"class":152},[142,423,266],{"class":206},[142,425,257],{"class":152},[142,427,357],{"class":148},[142,429,430],{"class":206},"\"分类：",[142,432,363],{"class":220},[142,434,435],{"class":152},"text",[142,437,369],{"class":220},[142,439,372],{"class":206},[142,441,442],{"class":152},"}],\n",[142,444,445],{"class":144,"line":272},[142,446,447],{"class":152},"            }\n",[142,449,450],{"class":144,"line":284},[142,451,452],{"class":152},"        }\n",[142,454,455,458,461,463,466],{"class":144,"line":299},[142,456,457],{"class":148},"        for",[142,459,460],{"class":152}," i, text ",[142,462,293],{"class":148},[142,464,465],{"class":220}," enumerate",[142,467,468],{"class":152},"(texts)\n",[142,470,472],{"class":144,"line":471},13,[142,473,474],{"class":152},"    ]\n",[142,476,478],{"class":144,"line":477},14,[142,479,480],{"class":152},")\n",[142,482,484],{"class":144,"line":483},15,[142,485,486],{"class":184},"# 轮询 batch.id 直到 status='ended'\n",[30,488,489],{},"万级任务用 Batch 一晚就出，成本是单条同步调用的 1\u002F2。",[26,491,492],{"id":492},"定价",[47,494,495,509],{},[50,496,497],{},[53,498,499,502,504,506],{},[56,500,501],{},"项目",[56,503,61],{},[56,505,64],{},[56,507,508],{},"倍数",[66,510,511,525,538,551],{},[53,512,513,516,519,522],{},[71,514,515],{},"Input",[71,517,518],{},"$1\u002FM",[71,520,521],{},"$3\u002FM",[71,523,524],{},"3×",[53,526,527,530,533,536],{},[71,528,529],{},"Output",[71,531,532],{},"$5\u002FM",[71,534,535],{},"$15\u002FM",[71,537,524],{},[53,539,540,543,546,549],{},[71,541,542],{},"Cache Read",[71,544,545],{},"$0.10\u002FM",[71,547,548],{},"$0.30\u002FM",[71,550,524],{},[53,552,553,556,559,562],{},[71,554,555],{},"Batch（-50%）",[71,557,558],{},"$0.5\u002F$2.5",[71,560,561],{},"$1.5\u002F$7.5",[71,563,524],{},[30,565,566],{},"一个月用 10 亿 token（Input\u002FOutput 各半）：",[568,569,570,574,577,580],"ul",{},[571,572,573],"li",{},"Sonnet 4: $9,000",[571,575,576],{},"Haiku 4: $3,000",[571,578,579],{},"Haiku 4 + 50% cache: $1,650",[571,581,582],{},"Haiku 4 + Batch: $1,500",[26,584,586],{"id":585},"在-cursor-ide-中的角色","在 Cursor \u002F IDE 中的角色",[30,588,589],{},"Cursor \u002F Windsurf 等 IDE 内部通常分两档模型：",[568,591,592,598],{},[571,593,594,597],{},[123,595,596],{},"Tab 自动补全 \u002F Cmd+K 小改动"," → 用 Haiku 4 这类速度优先模型",[571,599,600,603],{},[123,601,602],{},"Composer 多文件改写 \u002F Agent"," → 用 Sonnet 4 这类质量优先模型",[30,605,606],{},"如果你自建 IDE 集成，参考这个分层。",[26,608,610],{"id":609},"haiku-4-vs-同价位竞品","Haiku 4 vs 同价位竞品",[47,612,613,630],{},[50,614,615],{},[53,616,617,620,622,624,627],{},[56,618,619],{},"模型",[56,621,515],{},[56,623,529],{},[56,625,626],{},"SWE-bench",[56,628,629],{},"速度",[66,631,632,645,661,676],{},[53,633,634,636,638,640,642],{},[71,635,9],{},[71,637,518],{},[71,639,532],{},[71,641,15],{},[71,643,644],{},"★★★★★",[53,646,647,650,653,656,659],{},[71,648,649],{},"GPT-4o-mini",[71,651,652],{},"$0.15\u002FM",[71,654,655],{},"$0.60\u002FM",[71,657,658],{},"33.2%",[71,660,644],{},[53,662,663,666,669,671,674],{},[71,664,665],{},"Gemini 2.5 Flash",[71,667,668],{},"$0.075\u002FM",[71,670,548],{},[71,672,673],{},"43.8%",[71,675,644],{},[53,677,678,681,684,687,690],{},[71,679,680],{},"DeepSeek-V3",[71,682,683],{},"¥1\u002FM",[71,685,686],{},"¥2\u002FM",[71,688,689],{},"61.2%",[71,691,692],{},"★★★★☆",[30,694,695,696,699],{},"GPT-4o-mini \u002F Gemini Flash 便宜 5-10 倍，但 SWE-bench 差一大截。",[123,697,698],{},"Haiku 4 是\"轻量级里编程最强\"","，DeepSeek-V3 是\"国内便宜里编程最强\"。",[26,701,702],{"id":702},"三档模型怎么选",[47,704,705,717],{},[50,706,707],{},[53,708,709,711,714],{},[56,710,58],{},[56,712,713],{},"推荐",[56,715,716],{},"理由",[66,718,719,729,739,750,761,771],{},[53,720,721,724,726],{},[71,722,723],{},"代码补全\u002F实时建议",[71,725,61],{},[71,727,728],{},"速度优先",[53,730,731,734,736],{},[71,732,733],{},"主力编程\u002F代码审查",[71,735,64],{},[71,737,738],{},"质量优先",[53,740,741,744,747],{},[71,742,743],{},"深度推理\u002F长文写作",[71,745,746],{},"Opus 4",[71,748,749],{},"能力优先",[53,751,752,755,758],{},[71,753,754],{},"批量处理 10 万条数据",[71,756,757],{},"Haiku 4 + Batch",[71,759,760],{},"成本优先",[53,762,763,766,768],{},[71,764,765],{},"Agent 多步工具调用",[71,767,64],{},[71,769,770],{},"稳定性优先",[53,772,773,776,779],{},[71,774,775],{},"国内项目预算敏感",[71,777,778],{},"GLM-5.2 \u002F DeepSeek-V3",[71,780,781],{},"直连且更便宜",[26,783,785],{"id":784},"适用-不适用清单","适用 \u002F 不适用清单",[30,787,788],{},"✅ 适合：",[568,790,791,794,797,800,803],{},[571,792,793],{},"代码自动补全（Cursor Tab \u002F Copilot 风格）",[571,795,796],{},"实时聊天机器人 \u002F 客服",[571,798,799],{},"大批量分类 \u002F 抽取 \u002F 摘要",[571,801,802],{},"工具调用前置的简单 router 模型",[571,804,805],{},"日志解析 \u002F 语义提取",[30,807,808],{},"❌ 不适合：",[568,810,811,814,817,820],{},[571,812,813],{},"复杂多步推理（用 Sonnet 4 \u002F Opus 4）",[571,815,816],{},"多文件代码重构（用 Sonnet 4）",[571,818,819],{},"长文写作（16K 输出窗口不够）",[571,821,822],{},"法律 \u002F 医疗 \u002F 金融的严格场景（用 Opus 4）",[26,824,825],{"id":825},"避坑清单",[568,827,828,834,840,846],{},[571,829,830,833],{},[123,831,832],{},"别忘 max_tokens","：不显式设的话默认值很大，浪费速度和钱。",[571,835,836,839],{},[123,837,838],{},"16K 输出限制","：长文档处理时让模型分段输出。",[571,841,842,845],{},[123,843,844],{},"Cache 也要开","：哪怕 Haiku 已经很便宜，固定 prompt 部分 cache 后又能再省 50%。",[571,847,848,851],{},[123,849,850],{},"复杂任务不要硬凑","：模型选错宁可重路由也别让 Haiku 4 硬扛——你省的钱会在用户体验上还回去。",[26,853,854],{"id":854},"延伸阅读",[568,856,857,870,877],{},[571,858,859,860,865,866],{},"同系兄弟：",[861,862,864],"a",{"href":863},"\u002Fmodels\u002Fclaude-sonnet-4.html","Claude Sonnet 4"," \u002F ",[861,867,869],{"href":868},"\u002Fmodels\u002Fclaude-opus-4.html","Claude Opus 4",[571,871,872,873],{},"省钱原理：",[861,874,876],{"href":875},"\u002Fwiki\u002Ftoken.html","Token",[571,878,879,880],{},"速度优化：",[861,881,883],{"href":882},"\u002Fwiki\u002Fcontext-engineering.html","Context Engineering",[885,886,887],"style",{},"html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html pre.shiki code .s4XuR, html code.shiki .s4XuR{--shiki-default:#E36209;--shiki-dark:#FFAB70}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":137,"searchDepth":174,"depth":174,"links":889},[890,891,897,900,901,902,903,904,905,906],{"id":28,"depth":162,"text":28},{"id":35,"depth":162,"text":35,"children":892},[893,894,895,896],{"id":39,"depth":174,"text":39},{"id":104,"depth":174,"text":104},{"id":110,"depth":174,"text":111},{"id":117,"depth":174,"text":118},{"id":129,"depth":162,"text":130,"children":898},[899],{"id":308,"depth":174,"text":309},{"id":492,"depth":162,"text":492},{"id":585,"depth":162,"text":586},{"id":609,"depth":162,"text":610},{"id":702,"depth":162,"text":702},{"id":784,"depth":162,"text":785},{"id":825,"depth":162,"text":825},{"id":854,"depth":162,"text":854},"llm",200000,"Anthropic Claude Haiku 4 轻量快速模型，价格为 Sonnet 4 的 1\u002F3、推理速度约 3 倍，工具调用与指令跟随保持 Claude 家族水准，适合代码补全、IDE 实时辅助、批量处理与高并发 Agent 场景。","md",16384,{},"\u002Fmodels\u002Fclaude-haiku-4","Input $1\u002FM · Output $5\u002FM","2026-06-21",[917,918],"coding\u002Fide\u002Fcursor","coding\u002Fcli\u002Fclaude-code","2025-05-22",{"title":9,"description":909},"claude-haiku-4","models\u002Fclaude-haiku-4",[924,925,926,927,928],"速度极快，3 倍于 Sonnet 4","价格仅为 Sonnet 4 的 1\u002F3","200K 上下文，与 Sonnet 4 一致","编程能力远超同价位竞品","适合高并发、低延迟场景",[930,931,932,933],"代码补全（实时建议）","高并发客服 Bot","批量文本处理","轻量 Agent 任务","Anthropic",[936,937,938,939],"推理能力不如 Sonnet 4 \u002F Opus 4","复杂编程任务不如 Sonnet 4 稳定","16K 输出窗口偏短","国内无官方 API","-CLLhOK8SfSERewWRln2xOJCJJ7chSXi4gXInERf4pU",{"id":942,"title":869,"apiCompatible":943,"benchmarks":944,"body":953,"category":1704,"contextWindow":908,"description":1705,"extension":910,"maxOutput":1706,"meta":1707,"navigation":177,"path":1708,"pricing":1709,"published":915,"relatedTools":1710,"releaseDate":919,"seo":1711,"slug":1712,"stem":1713,"strengths":1714,"updated":915,"useCases":1720,"vendor":934,"vendorEn":934,"weaknesses":1725,"__hash__":1729},"models\u002Fmodels\u002Fclaude-opus-4.md",[11],[945,947,949,951],{"name":14,"score":946},"70.1%",{"name":17,"score":948},"93.2%",{"name":20,"score":950},"90.1%",{"name":952,"score":689},"GPQA Diamond",{"type":23,"value":954,"toc":1687},[955,957,960,962,965,968,982,986,998,1169,1172,1175,1178,1192,1195,1198,1200,1339,1350,1352,1406,1409,1412,1415,1439,1443,1526,1532,1535,1542,1546,1625,1632,1634,1660,1662,1684],[26,956,28],{"id":28},[30,958,959],{},"Claude Opus 4 是 Anthropic 于 2025 年 5 月与 Sonnet 4 同步发布的旗舰推理模型。推理能力和写作质量在所有大模型中名列前茅，但价格是 Sonnet 4 的 5 倍，定位为高价值场景专用。",[26,961,35],{"id":35},[37,963,964],{"id":964},"深度推理",[30,966,967],{},"GPQA Diamond 61.2%，在科学推理、逻辑分析、数学证明等复杂推理任务上优于 Sonnet 4。适合需要深度思考的场景：",[568,969,970,973,976,979],{},[571,971,972],{},"法律条文分析与跨条文关联",[571,974,975],{},"金融模型推导与场景压力测试",[571,977,978],{},"科学论文审阅与方法论评估",[571,980,981],{},"复杂决策树推理与边界条件枚举",[37,983,985],{"id":984},"thinking-模式","Thinking 模式",[30,987,988,989,992,993,997],{},"Opus 4 支持 ",[123,990,991],{},"extended thinking"," 模式，模型在回答前会先做内部推理（类似 ",[861,994,996],{"href":995},"\u002Fwiki\u002Fprompt-engineering.html","思维链","），思考过程对开发者可见。开启方式：",[132,999,1001],{"className":134,"code":1000,"language":136,"meta":137,"style":137},"resp = client.messages.create(\n    model=\"claude-opus-4-20250522\",\n    max_tokens=16000,\n    thinking={\"type\": \"enabled\", \"budget_tokens\": 10000},\n    messages=[{\"role\": \"user\", \"content\": \"Solve this complex problem...\"}],\n)\n# resp.content 里会包含 thinking block + text block\nfor block in resp.content:\n    if block.type == \"thinking\":\n        print(\"[think]\", block.thinking)\n    elif block.type == \"text\":\n        print(\"[answer]\", block.text)\n",[139,1002,1003,1013,1024,1035,1065,1090,1094,1099,1112,1129,1143,1157],{"__ignoreMap":137},[142,1004,1005,1008,1010],{"class":144,"line":145},[142,1006,1007],{"class":152},"resp ",[142,1009,168],{"class":148},[142,1011,1012],{"class":152}," client.messages.create(\n",[142,1014,1015,1017,1019,1022],{"class":144,"line":162},[142,1016,201],{"class":200},[142,1018,168],{"class":148},[142,1020,1021],{"class":206},"\"claude-opus-4-20250522\"",[142,1023,210],{"class":152},[142,1025,1026,1028,1030,1033],{"class":144,"line":174},[142,1027,215],{"class":200},[142,1029,168],{"class":148},[142,1031,1032],{"class":220},"16000",[142,1034,210],{"class":152},[142,1036,1037,1040,1042,1044,1047,1049,1052,1054,1057,1059,1062],{"class":144,"line":181},[142,1038,1039],{"class":200},"    thinking",[142,1041,168],{"class":148},[142,1043,363],{"class":152},[142,1045,1046],{"class":206},"\"type\"",[142,1048,257],{"class":152},[142,1050,1051],{"class":206},"\"enabled\"",[142,1053,263],{"class":152},[142,1055,1056],{"class":206},"\"budget_tokens\"",[142,1058,257],{"class":152},[142,1060,1061],{"class":220},"10000",[142,1063,1064],{"class":152},"},\n",[142,1066,1067,1069,1071,1073,1075,1077,1079,1081,1083,1085,1088],{"class":144,"line":188},[142,1068,246],{"class":200},[142,1070,168],{"class":148},[142,1072,251],{"class":152},[142,1074,254],{"class":206},[142,1076,257],{"class":152},[142,1078,260],{"class":206},[142,1080,263],{"class":152},[142,1082,266],{"class":206},[142,1084,257],{"class":152},[142,1086,1087],{"class":206},"\"Solve this complex problem...\"",[142,1089,442],{"class":152},[142,1091,1092],{"class":144,"line":197},[142,1093,480],{"class":152},[142,1095,1096],{"class":144,"line":5},[142,1097,1098],{"class":184},"# resp.content 里会包含 thinking block + text block\n",[142,1100,1101,1104,1107,1109],{"class":144,"line":230},[142,1102,1103],{"class":148},"for",[142,1105,1106],{"class":152}," block ",[142,1108,293],{"class":148},[142,1110,1111],{"class":152}," resp.content:\n",[142,1113,1114,1117,1120,1123,1126],{"class":144,"line":243},[142,1115,1116],{"class":148},"    if",[142,1118,1119],{"class":152}," block.type ",[142,1121,1122],{"class":148},"==",[142,1124,1125],{"class":206}," \"thinking\"",[142,1127,1128],{"class":152},":\n",[142,1130,1131,1134,1137,1140],{"class":144,"line":272},[142,1132,1133],{"class":220},"        print",[142,1135,1136],{"class":152},"(",[142,1138,1139],{"class":206},"\"[think]\"",[142,1141,1142],{"class":152},", block.thinking)\n",[142,1144,1145,1148,1150,1152,1155],{"class":144,"line":284},[142,1146,1147],{"class":148},"    elif",[142,1149,1119],{"class":152},[142,1151,1122],{"class":148},[142,1153,1154],{"class":206}," \"text\"",[142,1156,1128],{"class":152},[142,1158,1159,1161,1163,1166],{"class":144,"line":299},[142,1160,1133],{"class":220},[142,1162,1136],{"class":152},[142,1164,1165],{"class":206},"\"[answer]\"",[142,1167,1168],{"class":152},", block.text)\n",[30,1170,1171],{},"Thinking budget 越大、推理越深、用 token 越多。复杂数学\u002F法律问题给 5000-10000 通常足够。",[37,1173,1174],{"id":1174},"长文写作",[30,1176,1177],{},"Claude Opus 4 的写作质量是大模型中最高的——长文结构清晰、逻辑连贯、用词精准。适合：",[568,1179,1180,1183,1186,1189],{},[571,1181,1182],{},"学术论文润色",[571,1184,1185],{},"长篇小说创作",[571,1187,1188],{},"深度行业报告",[571,1190,1191],{},"高质量技术文档",[37,1193,1194],{"id":1194},"安全性",[30,1196,1197],{},"遵循 Constitutional AI 原则，处理敏感内容时比其他模型更谨慎。适合对安全性要求高的场景（医疗、法律、金融）。",[26,1199,130],{"id":129},[132,1201,1203],{"className":134,"code":1202,"language":136,"meta":137,"style":137},"from anthropic import Anthropic\n\nclient = Anthropic()\n\n# 基础调用\nresp = client.messages.create(\n    model=\"claude-opus-4-20250522\",\n    max_tokens=8000,\n    temperature=1.0,         # 推理模型保持默认 1.0\n    messages=[\n        {\"role\": \"user\", \"content\": \"分析这份合同的法律风险点：\\n...\"}\n    ],\n)\nprint(resp.content[0].text)\n",[139,1204,1205,1215,1219,1227,1231,1236,1244,1254,1265,1280,1288,1317,1322,1326],{"__ignoreMap":137},[142,1206,1207,1209,1211,1213],{"class":144,"line":145},[142,1208,149],{"class":148},[142,1210,153],{"class":152},[142,1212,156],{"class":148},[142,1214,159],{"class":152},[142,1216,1217],{"class":144,"line":162},[142,1218,178],{"emptyLinePlaceholder":177},[142,1220,1221,1223,1225],{"class":144,"line":174},[142,1222,165],{"class":152},[142,1224,168],{"class":148},[142,1226,171],{"class":152},[142,1228,1229],{"class":144,"line":181},[142,1230,178],{"emptyLinePlaceholder":177},[142,1232,1233],{"class":144,"line":188},[142,1234,1235],{"class":184},"# 基础调用\n",[142,1237,1238,1240,1242],{"class":144,"line":197},[142,1239,1007],{"class":152},[142,1241,168],{"class":148},[142,1243,1012],{"class":152},[142,1245,1246,1248,1250,1252],{"class":144,"line":5},[142,1247,201],{"class":200},[142,1249,168],{"class":148},[142,1251,1021],{"class":206},[142,1253,210],{"class":152},[142,1255,1256,1258,1260,1263],{"class":144,"line":230},[142,1257,215],{"class":200},[142,1259,168],{"class":148},[142,1261,1262],{"class":220},"8000",[142,1264,210],{"class":152},[142,1266,1267,1269,1271,1274,1277],{"class":144,"line":243},[142,1268,233],{"class":200},[142,1270,168],{"class":148},[142,1272,1273],{"class":220},"1.0",[142,1275,1276],{"class":152},",         ",[142,1278,1279],{"class":184},"# 推理模型保持默认 1.0\n",[142,1281,1282,1284,1286],{"class":144,"line":272},[142,1283,246],{"class":200},[142,1285,168],{"class":148},[142,1287,342],{"class":152},[142,1289,1290,1293,1295,1297,1299,1301,1303,1305,1308,1311,1314],{"class":144,"line":284},[142,1291,1292],{"class":152},"        {",[142,1294,254],{"class":206},[142,1296,257],{"class":152},[142,1298,260],{"class":206},[142,1300,263],{"class":152},[142,1302,266],{"class":206},[142,1304,257],{"class":152},[142,1306,1307],{"class":206},"\"分析这份合同的法律风险点：",[142,1309,1310],{"class":220},"\\n",[142,1312,1313],{"class":206},"...\"",[142,1315,1316],{"class":152},"}\n",[142,1318,1319],{"class":144,"line":299},[142,1320,1321],{"class":152},"    ],\n",[142,1323,1324],{"class":144,"line":471},[142,1325,480],{"class":152},[142,1327,1328,1331,1334,1336],{"class":144,"line":477},[142,1329,1330],{"class":220},"print",[142,1332,1333],{"class":152},"(resp.content[",[142,1335,238],{"class":220},[142,1337,1338],{"class":152},"].text)\n",[30,1340,1341,1342,1345,1346,126],{},"注意：thinking 模式开启时，",[139,1343,1344],{},"temperature"," 必须保持默认值，不能改。Anthropic 强制约束——和 OpenAI o-series 一样的设计哲学，详见 ",[861,1347,1349],{"href":1348},"\u002Fwiki\u002Ftemperature-top-p.html#%E6%8E%A8%E7%90%86%E6%A8%A1%E5%9E%8B%E4%B8%BA%E4%BB%80%E4%B9%88%E4%B8%8D%E5%BB%BA%E8%AE%AE%E6%94%B9-temperature","Temperature 与 Top-P",[26,1351,492],{"id":492},[47,1353,1354,1366],{},[50,1355,1356],{},[53,1357,1358,1360,1363],{},[56,1359,501],{},[56,1361,1362],{},"价格",[56,1364,1365],{},"对比 Sonnet 4",[66,1367,1368,1378,1387,1397],{},[53,1369,1370,1372,1375],{},[71,1371,515],{},[71,1373,1374],{},"$15 \u002F 百万 token",[71,1376,1377],{},"5×",[53,1379,1380,1382,1385],{},[71,1381,529],{},[71,1383,1384],{},"$75 \u002F 百万 token",[71,1386,1377],{},[53,1388,1389,1392,1395],{},[71,1390,1391],{},"Cache Write",[71,1393,1394],{},"$18.75 \u002F 百万 token",[71,1396,1377],{},[53,1398,1399,1401,1404],{},[71,1400,542],{},[71,1402,1403],{},"$1.50 \u002F 百万 token",[71,1405,1377],{},[30,1407,1408],{},"启用 thinking 模式时，thinking token 也计入 Output 计费。一次 10 万字的深度文档分析约花费 $3-5；启用 thinking 后可能翻倍。",[26,1410,1411],{"id":1411},"成本控制实操",[30,1413,1414],{},"Opus 4 价格高，三招控开支：",[1416,1417,1418,1424,1430],"ol",{},[571,1419,1420,1423],{},[123,1421,1422],{},"路由策略","：用 Sonnet 4 \u002F Haiku 4 处理简单任务，只把复杂推理路由到 Opus 4。可以用一个小模型做 \"task classifier\" 决定 routing。",[571,1425,1426,1429],{},[123,1427,1428],{},"Thinking budget 控制","：不要默认拉满。简单题 1000 token 够、复杂题 5000、极复杂 10000+。",[571,1431,1432,1435,1436,1438],{},[123,1433,1434],{},"Prompt Cache 必开","：长 system prompt（法律 \u002F 合规规则、金融模型上下文）放 cache，Read 价格 -90%（详见 ",[861,1437,876],{"href":875},"）。",[26,1440,1442],{"id":1441},"opus-4-vs-sonnet-4","Opus 4 vs Sonnet 4",[47,1444,1445,1456],{},[50,1446,1447],{},[53,1448,1449,1452,1454],{},[56,1450,1451],{},"维度",[56,1453,746],{},[56,1455,64],{},[66,1457,1458,1467,1476,1485,1495,1506,1516],{},[53,1459,1460,1463,1465],{},[71,1461,1462],{},"推理",[71,1464,644],{},[71,1466,692],{},[53,1468,1469,1472,1474],{},[71,1470,1471],{},"编程",[71,1473,692],{},[71,1475,644],{},[53,1477,1478,1481,1483],{},[71,1479,1480],{},"写作",[71,1482,644],{},[71,1484,692],{},[53,1486,1487,1489,1492],{},[71,1488,1362],{},[71,1490,1491],{},"$15\u002F$75",[71,1493,1494],{},"$3\u002F$15",[53,1496,1497,1500,1503],{},[71,1498,1499],{},"输出窗口",[71,1501,1502],{},"32K",[71,1504,1505],{},"64K",[53,1507,1508,1510,1513],{},[71,1509,629],{},[71,1511,1512],{},"较慢",[71,1514,1515],{},"快",[53,1517,1518,1520,1523],{},[71,1519,985],{},[71,1521,1522],{},"✅",[71,1524,1525],{},"❌（标准模式即可）",[30,1527,1528,1531],{},[123,1529,1530],{},"关键认知","：Opus 4 不是 \"Sonnet 4 加强版\" 而是不同任务的专用工具。日常编程 Sonnet 4 更快更好；只有遇到 Sonnet 4 推理崩盘的硬骨头，才切到 Opus 4。",[26,1533,1534],{"id":1534},"适用场景判断流程",[132,1536,1540],{"className":1537,"code":1539,"language":435},[1538],"language-text","任务来了\n  │\n  ├─ 主要是写代码？      → Sonnet 4\n  ├─ 主要是高吞吐对话？   → Haiku 4\n  ├─ 需要深度推理 \u002F 跨条文关联 \u002F 严谨写作？\n  │   └─ 是 → Opus 4（配 thinking）\n  └─ 都不是 → Sonnet 4 兜底\n",[139,1541,1539],{"__ignoreMap":137},[26,1543,1545],{"id":1544},"与-gpt-5-gemini-25-pro-推理能力对比","与 GPT-5 \u002F Gemini 2.5 Pro 推理能力对比",[47,1547,1548,1562],{},[50,1549,1550],{},[53,1551,1552,1554,1556,1559],{},[56,1553,1451],{},[56,1555,746],{},[56,1557,1558],{},"GPT-5",[56,1560,1561],{},"Gemini 2.5 Pro",[66,1563,1564,1576,1590,1602,1613],{},[53,1565,1566,1568,1570,1573],{},[71,1567,952],{},[71,1569,689],{},[71,1571,1572],{},"62.5%",[71,1574,1575],{},"56.4%",[53,1577,1578,1581,1584,1587],{},[71,1579,1580],{},"MATH-500",[71,1582,1583],{},"~96%",[71,1585,1586],{},"98.4%",[71,1588,1589],{},"~95%",[53,1591,1592,1595,1597,1599],{},[71,1593,1594],{},"写作质量",[71,1596,644],{},[71,1598,692],{},[71,1600,1601],{},"★★★☆☆",[53,1603,1604,1606,1608,1611],{},[71,1605,1362],{},[71,1607,1491],{},[71,1609,1610],{},"$1.25\u002F$10",[71,1612,1610],{},[53,1614,1615,1617,1620,1623],{},[71,1616,1194],{},[71,1618,1619],{},"最严格",[71,1621,1622],{},"中等",[71,1624,1622],{},[30,1626,1627,1628,1631],{},"价格上 Opus 完全无优势——选它的理由是 ",[123,1629,1630],{},"写作质量 + 安全性 + Constitutional AI","，纯推理 \u002F 纯编程不一定值。",[26,1633,825],{"id":825},[568,1635,1636,1642,1648,1654],{},[571,1637,1638,1641],{},[123,1639,1640],{},"不要默认上 Opus 4","：90% 任务 Sonnet 4 就够，Opus 浪费钱。",[571,1643,1644,1647],{},[123,1645,1646],{},"Thinking budget 不要拉满","：从 2000 开始测，按需调高。",[571,1649,1650,1653],{},[123,1651,1652],{},"温度别动","：thinking 模式下不允许，标准模式也建议保留默认 1.0。",[571,1655,1656,1659],{},[123,1657,1658],{},"输出窗口 32K 上限","：长文写作切大块分次生成，不要试图一次 50K 输出。",[26,1661,854],{"id":854},[568,1663,1664,1672,1678],{},[571,1665,1666,1667,1671],{},"何时切换：",[861,1668,1670],{"href":1669},"\u002Fwiki\u002Fhallucination.html","Hallucination","（Opus 4 拒答率高，是优点也是缺点）",[571,1673,1674,1675],{},"推理与采样：",[861,1676,1349],{"href":1677},"\u002Fwiki\u002Ftemperature-top-p.html",[571,1679,1680,1681],{},"写作前置：",[861,1682,1683],{"href":995},"Prompt Engineering",[885,1685,1686],{},"html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .s4XuR, html code.shiki .s4XuR{--shiki-default:#E36209;--shiki-dark:#FFAB70}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":137,"searchDepth":174,"depth":174,"links":1688},[1689,1690,1696,1697,1698,1699,1700,1701,1702,1703],{"id":28,"depth":162,"text":28},{"id":35,"depth":162,"text":35,"children":1691},[1692,1693,1694,1695],{"id":964,"depth":174,"text":964},{"id":984,"depth":174,"text":985},{"id":1174,"depth":174,"text":1174},{"id":1194,"depth":174,"text":1194},{"id":129,"depth":162,"text":130},{"id":492,"depth":162,"text":492},{"id":1411,"depth":162,"text":1411},{"id":1441,"depth":162,"text":1442},{"id":1534,"depth":162,"text":1534},{"id":1544,"depth":162,"text":1545},{"id":825,"depth":162,"text":825},{"id":854,"depth":162,"text":854},"reasoning","Anthropic Claude Opus 4 旗舰推理模型，复杂代码任务、长文写作、多步规划与 Agent 链路能力业界领先，Output $75\u002FM 为 Sonnet 4 的 5 倍，适合高价值难题、深度研究与企业级关键任务。",32000,{},"\u002Fmodels\u002Fclaude-opus-4","Input $15\u002FM · Output $75\u002FM",[918,917],{"title":869,"description":1705},"claude-opus-4","models\u002Fclaude-opus-4",[1715,1716,1717,1718,1719],"推理能力 Anthropic 系列最强","复杂写作质量极高（长文\u002F小说\u002F学术）","200K 上下文，长文档分析深入","安全性好，遵循 Constitutional AI 原则","工具调用稳定，适合复杂 Agent 场景",[1721,1722,1723,1724],"复杂推理任务（法律分析、金融建模）","高质量长文写作","深度文档分析","安全敏感场景",[1726,1727,1728,939],"价格极高（Sonnet 4 的 5 倍）","编程能力与 Sonnet 4 持平甚至略低","32K 输出窗口不如 Sonnet 4（64K）","kHG9iOly-noNYUUe6-flvkUEjzI3XwOEFFivmFtMfMU",{"id":1731,"title":864,"apiCompatible":1732,"benchmarks":1733,"body":1742,"category":2720,"contextWindow":908,"description":2721,"extension":910,"maxOutput":2722,"meta":2723,"navigation":177,"path":2724,"pricing":2725,"published":915,"relatedTools":2726,"releaseDate":919,"seo":2729,"slug":2620,"stem":2730,"strengths":2731,"updated":915,"useCases":2737,"vendor":934,"vendorEn":934,"weaknesses":2742,"__hash__":2747},"models\u002Fmodels\u002Fclaude-sonnet-4.md",[11],[1734,1736,1738,1740],{"name":14,"score":1735},"72.7%",{"name":17,"score":1737},"93.7%",{"name":20,"score":1739},"88.7%",{"name":952,"score":1741},"59.4%",{"type":23,"value":1743,"toc":2698},[1744,1746,1749,1751,1753,1756,1759,1762,1766,1779,1793,1797,1800,1802,1806,1965,1969,1976,2108,2116,2119,2220,2223,2299,2308,2310,2351,2354,2357,2360,2410,2417,2420,2423,2453,2456,2459,2485,2488,2608,2610,2667,2669,2695],[26,1745,28],{"id":28},[30,1747,1748],{},"Claude Sonnet 4 是 Anthropic 于 2025 年 5 月发布的旗舰模型，定位为编程与推理的全能型选手。在 SWE-bench Verified 上拿到 72.7% 的成绩，成为 Cursor、Claude Code、Cline 等主流 AI 编程工具的首选模型。",[26,1750,35],{"id":35},[37,1752,1471],{"id":1471},[30,1754,1755],{},"SWE-bench Verified 72.7% — 这意味着它能独立解决近 3\u002F4 的真实 GitHub issue。在 Cursor Composer 多文件改写场景下，Sonnet 4 的成功率明显高于 GPT-4o 和 Gemini 2.5 Pro。",[37,1757,1758],{"id":1758},"长上下文",[30,1760,1761],{},"200K token 上下文窗口足以容纳一个中型项目的全部源码。64K 的输出窗口让它能一次性生成完整的长文件，不会在中间截断。",[37,1763,1765],{"id":1764},"agent-工具调用","Agent 工具调用",[30,1767,1768,1769,1773,1774,1778],{},"Claude Sonnet 4 的",[861,1770,1772],{"href":1771},"\u002Fwiki\u002Ffunction-calling.html","工具调用","（tool use）非常稳定。在多步 ",[861,1775,1777],{"href":1776},"\u002Fwiki\u002Fai-agent.html","Agent"," 工作流中，它能可靠地：",[568,1780,1781,1784,1787,1790],{},[571,1782,1783],{},"解析工具返回结果",[571,1785,1786],{},"决定下一步调用哪个工具",[571,1788,1789],{},"在工具失败时自动重试或换方案",[571,1791,1792],{},"支持 parallel tool calls，一次返回多个独立调用",[37,1794,1796],{"id":1795},"artifacts","Artifacts",[30,1798,1799],{},"原生支持 artifacts 功能——生成的 HTML\u002FReact\u002FSVG 等前端代码可以直接在对话中渲染预览，不需要切换到外部工具。",[26,1801,130],{"id":129},[37,1803,1805],{"id":1804},"python-sdk","Python SDK",[132,1807,1809],{"className":134,"code":1808,"language":136,"meta":137,"style":137},"from anthropic import Anthropic\n\nclient = Anthropic(api_key=\"sk-ant-...\")\n\nresp = client.messages.create(\n    model=\"claude-sonnet-4-20250522\",\n    max_tokens=4096,\n    temperature=0,           # 编程场景建议 0，详见 wiki\u002Ftemperature-top-p\n    system=\"You are an expert Python reviewer.\",\n    messages=[\n        {\"role\": \"user\", \"content\": \"Review this code:\\n```python\\n...\\n```\"}\n    ],\n)\nprint(resp.content[0].text)\n",[139,1810,1811,1821,1825,1844,1848,1856,1867,1878,1891,1903,1911,1947,1951,1955],{"__ignoreMap":137},[142,1812,1813,1815,1817,1819],{"class":144,"line":145},[142,1814,149],{"class":148},[142,1816,153],{"class":152},[142,1818,156],{"class":148},[142,1820,159],{"class":152},[142,1822,1823],{"class":144,"line":162},[142,1824,178],{"emptyLinePlaceholder":177},[142,1826,1827,1829,1831,1834,1837,1839,1842],{"class":144,"line":174},[142,1828,165],{"class":152},[142,1830,168],{"class":148},[142,1832,1833],{"class":152}," Anthropic(",[142,1835,1836],{"class":200},"api_key",[142,1838,168],{"class":148},[142,1840,1841],{"class":206},"\"sk-ant-...\"",[142,1843,480],{"class":152},[142,1845,1846],{"class":144,"line":181},[142,1847,178],{"emptyLinePlaceholder":177},[142,1849,1850,1852,1854],{"class":144,"line":188},[142,1851,1007],{"class":152},[142,1853,168],{"class":148},[142,1855,1012],{"class":152},[142,1857,1858,1860,1862,1865],{"class":144,"line":197},[142,1859,201],{"class":200},[142,1861,168],{"class":148},[142,1863,1864],{"class":206},"\"claude-sonnet-4-20250522\"",[142,1866,210],{"class":152},[142,1868,1869,1871,1873,1876],{"class":144,"line":5},[142,1870,215],{"class":200},[142,1872,168],{"class":148},[142,1874,1875],{"class":220},"4096",[142,1877,210],{"class":152},[142,1879,1880,1882,1884,1886,1888],{"class":144,"line":230},[142,1881,233],{"class":200},[142,1883,168],{"class":148},[142,1885,238],{"class":220},[142,1887,224],{"class":152},[142,1889,1890],{"class":184},"# 编程场景建议 0，详见 wiki\u002Ftemperature-top-p\n",[142,1892,1893,1896,1898,1901],{"class":144,"line":243},[142,1894,1895],{"class":200},"    system",[142,1897,168],{"class":148},[142,1899,1900],{"class":206},"\"You are an expert Python reviewer.\"",[142,1902,210],{"class":152},[142,1904,1905,1907,1909],{"class":144,"line":272},[142,1906,246],{"class":200},[142,1908,168],{"class":148},[142,1910,342],{"class":152},[142,1912,1913,1915,1917,1919,1921,1923,1925,1927,1930,1932,1935,1937,1940,1942,1945],{"class":144,"line":284},[142,1914,1292],{"class":152},[142,1916,254],{"class":206},[142,1918,257],{"class":152},[142,1920,260],{"class":206},[142,1922,263],{"class":152},[142,1924,266],{"class":206},[142,1926,257],{"class":152},[142,1928,1929],{"class":206},"\"Review this code:",[142,1931,1310],{"class":220},[142,1933,1934],{"class":206},"```python",[142,1936,1310],{"class":220},[142,1938,1939],{"class":206},"...",[142,1941,1310],{"class":220},[142,1943,1944],{"class":206},"```\"",[142,1946,1316],{"class":152},[142,1948,1949],{"class":144,"line":299},[142,1950,1321],{"class":152},[142,1952,1953],{"class":144,"line":471},[142,1954,480],{"class":152},[142,1956,1957,1959,1961,1963],{"class":144,"line":477},[142,1958,1330],{"class":220},[142,1960,1333],{"class":152},[142,1962,238],{"class":220},[142,1964,1338],{"class":152},[37,1966,1968],{"id":1967},"启用-prompt-cache省钱关键","启用 Prompt Cache（省钱关键）",[30,1970,1971,1972,1975],{},"长 system prompt 或工具定义放 ",[139,1973,1974],{},"cache_control"," 块里，5 分钟内复用 Input 价格 -90%：",[132,1977,1979],{"className":134,"code":1978,"language":136,"meta":137,"style":137},"resp = client.messages.create(\n    model=\"claude-sonnet-4-20250522\",\n    max_tokens=4096,\n    system=[\n        {\n            \"type\": \"text\",\n            \"text\": \"\u003Chuge_codebase_context>...\u003C\u002Fhuge_codebase_context>\",\n            \"cache_control\": {\"type\": \"ephemeral\"},  # ← 关键\n        }\n    ],\n    messages=[{\"role\": \"user\", \"content\": \"Find bugs in module X.\"}],\n)\n# 看 resp.usage.cache_read_input_tokens 确认命中\n",[139,1980,1981,1989,1999,2009,2017,2021,2033,2045,2066,2070,2074,2099,2103],{"__ignoreMap":137},[142,1982,1983,1985,1987],{"class":144,"line":145},[142,1984,1007],{"class":152},[142,1986,168],{"class":148},[142,1988,1012],{"class":152},[142,1990,1991,1993,1995,1997],{"class":144,"line":162},[142,1992,201],{"class":200},[142,1994,168],{"class":148},[142,1996,1864],{"class":206},[142,1998,210],{"class":152},[142,2000,2001,2003,2005,2007],{"class":144,"line":174},[142,2002,215],{"class":200},[142,2004,168],{"class":148},[142,2006,1875],{"class":220},[142,2008,210],{"class":152},[142,2010,2011,2013,2015],{"class":144,"line":181},[142,2012,1895],{"class":200},[142,2014,168],{"class":148},[142,2016,342],{"class":152},[142,2018,2019],{"class":144,"line":188},[142,2020,347],{"class":152},[142,2022,2023,2026,2028,2031],{"class":144,"line":197},[142,2024,2025],{"class":206},"            \"type\"",[142,2027,257],{"class":152},[142,2029,2030],{"class":206},"\"text\"",[142,2032,210],{"class":152},[142,2034,2035,2038,2040,2043],{"class":144,"line":5},[142,2036,2037],{"class":206},"            \"text\"",[142,2039,257],{"class":152},[142,2041,2042],{"class":206},"\"\u003Chuge_codebase_context>...\u003C\u002Fhuge_codebase_context>\"",[142,2044,210],{"class":152},[142,2046,2047,2050,2053,2055,2057,2060,2063],{"class":144,"line":230},[142,2048,2049],{"class":206},"            \"cache_control\"",[142,2051,2052],{"class":152},": {",[142,2054,1046],{"class":206},[142,2056,257],{"class":152},[142,2058,2059],{"class":206},"\"ephemeral\"",[142,2061,2062],{"class":152},"},  ",[142,2064,2065],{"class":184},"# ← 关键\n",[142,2067,2068],{"class":144,"line":243},[142,2069,452],{"class":152},[142,2071,2072],{"class":144,"line":272},[142,2073,1321],{"class":152},[142,2075,2076,2078,2080,2082,2084,2086,2088,2090,2092,2094,2097],{"class":144,"line":284},[142,2077,246],{"class":200},[142,2079,168],{"class":148},[142,2081,251],{"class":152},[142,2083,254],{"class":206},[142,2085,257],{"class":152},[142,2087,260],{"class":206},[142,2089,263],{"class":152},[142,2091,266],{"class":206},[142,2093,257],{"class":152},[142,2095,2096],{"class":206},"\"Find bugs in module X.\"",[142,2098,442],{"class":152},[142,2100,2101],{"class":144,"line":299},[142,2102,480],{"class":152},[142,2104,2105],{"class":144,"line":471},[142,2106,2107],{"class":184},"# 看 resp.usage.cache_read_input_tokens 确认命中\n",[30,2109,2110,2111,2113,2114,126],{},"实测：50K token 的固定 system prompt 配合 cache，每次调用 Input 成本从 $0.15 降到 $0.015，重度使用月省数百刀。详见 ",[861,2112,876],{"href":875}," 与 ",[861,2115,883],{"href":882},[37,2117,2118],{"id":2118},"流式输出",[132,2120,2122],{"className":134,"code":2121,"language":136,"meta":137,"style":137},"with client.messages.stream(\n    model=\"claude-sonnet-4-20250522\",\n    max_tokens=4096,\n    messages=[{\"role\": \"user\", \"content\": \"Write a long article.\"}],\n) as stream:\n    for text in stream.text_stream:\n        print(text, end=\"\", flush=True)\n",[139,2123,2124,2130,2140,2150,2175,2183,2193],{"__ignoreMap":137},[142,2125,2126,2128],{"class":144,"line":145},[142,2127,191],{"class":148},[142,2129,194],{"class":152},[142,2131,2132,2134,2136,2138],{"class":144,"line":162},[142,2133,201],{"class":200},[142,2135,168],{"class":148},[142,2137,1864],{"class":206},[142,2139,210],{"class":152},[142,2141,2142,2144,2146,2148],{"class":144,"line":174},[142,2143,215],{"class":200},[142,2145,168],{"class":148},[142,2147,1875],{"class":220},[142,2149,210],{"class":152},[142,2151,2152,2154,2156,2158,2160,2162,2164,2166,2168,2170,2173],{"class":144,"line":181},[142,2153,246],{"class":200},[142,2155,168],{"class":148},[142,2157,251],{"class":152},[142,2159,254],{"class":206},[142,2161,257],{"class":152},[142,2163,260],{"class":206},[142,2165,263],{"class":152},[142,2167,266],{"class":206},[142,2169,257],{"class":152},[142,2171,2172],{"class":206},"\"Write a long article.\"",[142,2174,442],{"class":152},[142,2176,2177,2179,2181],{"class":144,"line":188},[142,2178,275],{"class":152},[142,2180,278],{"class":148},[142,2182,281],{"class":152},[142,2184,2185,2187,2189,2191],{"class":144,"line":197},[142,2186,287],{"class":148},[142,2188,290],{"class":152},[142,2190,293],{"class":148},[142,2192,296],{"class":152},[142,2194,2195,2197,2200,2203,2205,2208,2210,2213,2215,2218],{"class":144,"line":5},[142,2196,1133],{"class":220},[142,2198,2199],{"class":152},"(text, ",[142,2201,2202],{"class":200},"end",[142,2204,168],{"class":148},[142,2206,2207],{"class":206},"\"\"",[142,2209,263],{"class":152},[142,2211,2212],{"class":200},"flush",[142,2214,168],{"class":148},[142,2216,2217],{"class":220},"True",[142,2219,480],{"class":152},[26,2221,2222],{"id":2222},"关键参数",[47,2224,2225,2237],{},[50,2226,2227],{},[53,2228,2229,2232,2235],{},[56,2230,2231],{},"参数",[56,2233,2234],{},"推荐值",[56,2236,58],{},[66,2238,2239,2250,2262,2273,2286],{},[53,2240,2241,2245,2247],{},[71,2242,2243],{},[139,2244,1344],{},[71,2246,238],{},[71,2248,2249],{},"代码生成 \u002F 工具调用 \u002F 数据抽取",[53,2251,2252,2256,2259],{},[71,2253,2254],{},[139,2255,1344],{},[71,2257,2258],{},"0.7",[71,2260,2261],{},"通用对话",[53,2263,2264,2268,2270],{},[71,2265,2266],{},[139,2267,1344],{},[71,2269,1273],{},[71,2271,2272],{},"创意写作",[53,2274,2275,2280,2283],{},[71,2276,2277],{},[139,2278,2279],{},"max_tokens",[71,2281,2282],{},"显式设置",[71,2284,2285],{},"不要默认上 64K，会浪费钱",[53,2287,2288,2293,2296],{},[71,2289,2290],{},[139,2291,2292],{},"stop_sequences",[71,2294,2295],{},"按需设",[71,2297,2298],{},"结构化输出场景提前截断",[30,2300,2301,2302,2305,2306,126],{},"强烈建议工具调用场景设 ",[139,2303,2304],{},"temperature=0","，能让函数选择和参数提取稳定 10x。原理见 ",[861,2307,1349],{"href":1677},[26,2309,492],{"id":492},[47,2311,2312,2320],{},[50,2313,2314],{},[53,2315,2316,2318],{},[56,2317,501],{},[56,2319,1362],{},[66,2321,2322,2329,2335,2343],{},[53,2323,2324,2326],{},[71,2325,515],{},[71,2327,2328],{},"$3 \u002F 百万 token",[53,2330,2331,2333],{},[71,2332,529],{},[71,2334,1374],{},[53,2336,2337,2340],{},[71,2338,2339],{},"Prompt Cache Write",[71,2341,2342],{},"$3.75 \u002F 百万 token",[53,2344,2345,2348],{},[71,2346,2347],{},"Prompt Cache Read",[71,2349,2350],{},"$0.30 \u002F 百万 token",[30,2352,2353],{},"对比 GPT-4o（Input $2.5\u002FM · Output $10\u002FM），Sonnet 4 贵约 20-50%，但编程质量更高。重度编程用户差的这点钱完全值得。",[26,2355,2356],{"id":2356},"限流与并发",[30,2358,2359],{},"Anthropic 官方 API 的 rate limit 按 tier 分档（Tier 1 → Tier 4），关键三个指标：",[47,2361,2362,2375],{},[50,2363,2364],{},[53,2365,2366,2369,2372],{},[56,2367,2368],{},"指标",[56,2370,2371],{},"Tier 1（首付费用户）",[56,2373,2374],{},"Tier 4（高消费用户）",[66,2376,2377,2388,2399],{},[53,2378,2379,2382,2385],{},[71,2380,2381],{},"RPM（请求\u002F分钟）",[71,2383,2384],{},"50",[71,2386,2387],{},"4,000",[53,2389,2390,2393,2396],{},[71,2391,2392],{},"ITPM（input token\u002F分钟）",[71,2394,2395],{},"50K",[71,2397,2398],{},"2M",[53,2400,2401,2404,2407],{},[71,2402,2403],{},"OTPM（output token\u002F分钟）",[71,2405,2406],{},"10K",[71,2408,2409],{},"400K",[30,2411,2412,2413,2416],{},"踩坑：长 prompt + 高并发场景，最先打到的不是 RPM 而是 ITPM——50K input 一发就触顶。",[123,2414,2415],{},"生产环境建议先升 Tier、再做限流（exponential backoff 重试 429）、再开 prompt cache","，三件套组合才能稳定撑住流量。",[26,2418,2419],{"id":2419},"在国内怎么用",[30,2421,2422],{},"官方 API 不对中国大陆开放，但可以通过以下方式使用：",[1416,2424,2425,2431,2441,2447],{},[571,2426,2427,2430],{},[123,2428,2429],{},"AWS Bedrock"," — 企业级方案，需要 AWS 海外账号",[571,2432,2433,2436,2437,2440],{},[123,2434,2435],{},"API 中转"," — 国内有多家提供 Claude API 中转服务（见 ",[139,2438,2439],{},"\u002Fcoding\u002Fapi\u002F"," 分类）",[571,2442,2443,2446],{},[123,2444,2445],{},"Cursor \u002F Claude Code"," — 这些工具内置了 Claude 模型，直接订阅即可",[571,2448,2449,2452],{},[123,2450,2451],{},"Coze \u002F 元器"," — 国内 Agent 平台部分已接入",[30,2454,2455],{},"注意中转服务的稳定性 \u002F 合规性差异较大，生产场景优先选 Bedrock 或自己用 Cloudflare Workers 做转发。",[26,2457,2458],{"id":2458},"适用场景",[568,2460,2461,2467,2473,2479],{},[571,2462,2463,2466],{},[123,2464,2465],{},"主力编程模型","：Cursor \u002F Claude Code \u002F Cline 的默认选择",[571,2468,2469,2472],{},[123,2470,2471],{},"代码审查","：批量 PR review，比 GPT-4o 更精准",[571,2474,2475,2478],{},[123,2476,2477],{},"长文档处理","：200K 上下文处理合同、论文、技术文档",[571,2480,2481,2484],{},[123,2482,2483],{},"Agent 编排","：多步工具调用稳定，适合 Coze \u002F Dify 工作流",[26,2486,2487],{"id":2487},"与同档模型怎么选",[47,2489,2490,2505],{},[50,2491,2492],{},[53,2493,2494,2496,2498,2500,2502],{},[56,2495,1451],{},[56,2497,64],{},[56,2499,1558],{},[56,2501,1561],{},[56,2503,2504],{},"GLM-5.2",[66,2506,2507,2522,2538,2551,2564,2580,2594],{},[53,2508,2509,2511,2513,2516,2519],{},[71,2510,626],{},[71,2512,1735],{},[71,2514,2515],{},"68.0%",[71,2517,2518],{},"63.8%",[71,2520,2521],{},"65.3%",[53,2523,2524,2527,2530,2532,2535],{},[71,2525,2526],{},"上下文",[71,2528,2529],{},"200K",[71,2531,2409],{},[71,2533,2534],{},"1M",[71,2536,2537],{},"128K",[53,2539,2540,2542,2544,2546,2549],{},[71,2541,1499],{},[71,2543,1505],{},[71,2545,2537],{},[71,2547,2548],{},"65K",[71,2550,1502],{},[53,2552,2553,2556,2558,2560,2562],{},[71,2554,2555],{},"工具调用稳定性",[71,2557,644],{},[71,2559,692],{},[71,2561,692],{},[71,2563,1601],{},[53,2565,2566,2569,2572,2575,2578],{},[71,2567,2568],{},"多模态",[71,2570,2571],{},"图片",[71,2573,2574],{},"图片+音频+视频",[71,2576,2577],{},"全类型",[71,2579,2571],{},[53,2581,2582,2585,2587,2590,2592],{},[71,2583,2584],{},"Input 价格",[71,2586,521],{},[71,2588,2589],{},"$1.25\u002FM",[71,2591,2589],{},[71,2593,686],{},[53,2595,2596,2599,2602,2604,2606],{},[71,2597,2598],{},"国内直连",[71,2600,2601],{},"❌",[71,2603,2601],{},[71,2605,2601],{},[71,2607,1522],{},[26,2609,825],{"id":825},[568,2611,2612,2626,2640,2652,2658],{},[571,2613,2614,2617,2618,2621,2622,2625],{},[123,2615,2616],{},"别忘记带版本号","：",[139,2619,2620],{},"claude-sonnet-4"," 是 alias，可能指向新版导致行为变化；生产用 ",[139,2623,2624],{},"claude-sonnet-4-20250522"," 这种带日期的稳定 ID。",[571,2627,2628,2631,2632,2635,2636,2639],{},[123,2629,2630],{},"系统消息位置","：Anthropic 把 ",[139,2633,2634],{},"system"," 单独作为参数传，不要混进 ",[139,2637,2638],{},"messages"," 数组里——很多从 OpenAI 迁移过来的代码会踩这个坑。",[571,2641,2642,2645,2646,2649,2650,126],{},[123,2643,2644],{},"prompt cache 失效","：cache 段前面只要变一个 token，整段缓存就废了。",[123,2647,2648],{},"动态内容必须放最后","，详见 ",[861,2651,883],{"href":882},[571,2653,2654,2657],{},[123,2655,2656],{},"工具定义重复发","：每轮对话都把工具定义重发会浪费 input token。配合 prompt cache 把工具定义标记为可缓存。",[571,2659,2660,2663,2664,126],{},[123,2661,2662],{},"超时设置","：默认 SDK 超时 600 秒，长 Agent 任务可能不够。显式设 ",[139,2665,2666],{},"client = Anthropic(timeout=1800)",[26,2668,854],{"id":854},[568,2670,2671,2677,2684,2689],{},[571,2672,2673,2674],{},"调用模式：",[861,2675,2676],{"href":1771},"Function Calling",[571,2678,2679,2680,865,2682],{},"省钱：",[861,2681,883],{"href":882},[861,2683,876],{"href":875},[571,2685,2686,2687],{},"采样：",[861,2688,1349],{"href":1677},[571,2690,2691,2692],{},"Agent 视角：",[861,2693,2694],{"href":1776},"AI Agent",[885,2696,2697],{},"html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .s4XuR, html code.shiki .s4XuR{--shiki-default:#E36209;--shiki-dark:#FFAB70}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":137,"searchDepth":174,"depth":174,"links":2699},[2700,2701,2707,2712,2713,2714,2715,2716,2717,2718,2719],{"id":28,"depth":162,"text":28},{"id":35,"depth":162,"text":35,"children":2702},[2703,2704,2705,2706],{"id":1471,"depth":174,"text":1471},{"id":1758,"depth":174,"text":1758},{"id":1764,"depth":174,"text":1765},{"id":1795,"depth":174,"text":1796},{"id":129,"depth":162,"text":130,"children":2708},[2709,2710,2711],{"id":1804,"depth":174,"text":1805},{"id":1967,"depth":174,"text":1968},{"id":2118,"depth":174,"text":2118},{"id":2222,"depth":162,"text":2222},{"id":492,"depth":162,"text":492},{"id":2356,"depth":162,"text":2356},{"id":2419,"depth":162,"text":2419},{"id":2458,"depth":162,"text":2458},{"id":2487,"depth":162,"text":2487},{"id":825,"depth":162,"text":825},{"id":854,"depth":162,"text":854},"coding","Anthropic 旗舰编程模型，200K 上下文 + 64K 输出，SWE-bench 72.7%，目前 AI 编程领域最强模型之一。",64000,{},"\u002Fmodels\u002Fclaude-sonnet-4","Input $3\u002FM · Output $15\u002FM · Prompt Cache $0.30\u002FM",[918,917,2727,2728],"coding\u002Fide\u002Ftrae","coding\u002Fcli\u002Fcline",{"title":864,"description":2721},"models\u002Fclaude-sonnet-4",[2732,2733,2734,2735,2736],"顶级编程能力，SWE-bench 72.7%，在 Cursor \u002F Claude Code 中表现最佳","artifacts 原生支持，前端代码生成即时预览","64K 输出窗口，长文件一次生成不截断","多步推理链稳定，Agent 场景下工具调用可靠","200K 上下文，大型代码库分析无压力",[2738,2739,2740,2741],"AI 编程（Cursor \u002F Claude Code \u002F Cline 主力模型）","代码审查与重构","长文档分析与总结","Agent 工作流编排",[2743,2744,2745,2746],"国内无官方 API，需走中转或 AWS Bedrock","价格高于竞品（GPT-4o Input $2.5\u002FM）","无原生图片生成能力","知识截止 2025-04，近期事件不了解","Ydh4olATGpkHBU7APj7t18_45BogDO4Q2zVoTzL7tQI",{"id":2749,"title":2750,"apiCompatible":2751,"benchmarks":2753,"body":2763,"category":1704,"contextWindow":3692,"description":3693,"extension":910,"maxOutput":3694,"meta":3695,"navigation":177,"path":3696,"pricing":3697,"published":915,"relatedTools":3698,"releaseDate":3701,"seo":3702,"slug":3703,"stem":3704,"strengths":3705,"updated":915,"useCases":3711,"vendor":3715,"vendorEn":3716,"weaknesses":3717,"__hash__":3722},"models\u002Fmodels\u002Fdeepseek-r1.md","DeepSeek-R1",[2752],"openai",[2754,2756,2759,2761],{"name":1580,"score":2755},"97.3%",{"name":2757,"score":2758},"AIME 2024","79.8%",{"name":952,"score":2760},"58.2%",{"name":17,"score":2762},"89.2%",{"type":23,"value":2764,"toc":3675},[2765,2767,2770,2773,2775,2778,2781,2852,2855,2858,2861,2953,2959,2985,2988,2995,3045,3048,3050,3248,3252,3264,3325,3328,3330,3365,3371,3374,3378,3460,3465,3476,3480,3557,3564,3567,3569,3594,3596,3610,3612,3644,3646,3672],[26,2766,28],{"id":28},[30,2768,2769],{},"DeepSeek-R1 是深度求索于 2025 年 1 月与 V3 同步发布的推理模型。与 V3 的区别在于：R1 在回答前会先\"想一想\"（思维链），在数学、逻辑、科学推理上远超 V3。",[30,2771,2772],{},"R1 的发布是开源大模型领域的一个分水岭事件——首次让\"推理模型\"以完全开源 + 思维链可见的形式进入业界，比 OpenAI o1（思维链黑盒）更开放。",[26,2774,35],{"id":35},[37,2776,2777],{"id":2777},"推理能力",[30,2779,2780],{},"R1 在 MATH-500 上拿到 97.3%，AIME 2024（美国数学竞赛）79.8%。这些成绩接近 GPT-5（98.4% \u002F 82.3%），远超非推理模型：",[47,2782,2783,2793],{},[50,2784,2785],{},[53,2786,2787,2789,2791],{},[56,2788,619],{},[56,2790,1580],{},[56,2792,2757],{},[66,2794,2795,2804,2812,2821,2831,2841],{},[53,2796,2797,2799,2801],{},[71,2798,1558],{},[71,2800,1586],{},[71,2802,2803],{},"82.3%",[53,2805,2806,2808,2810],{},[71,2807,2750],{},[71,2809,2755],{},[71,2811,2758],{},[53,2813,2814,2816,2818],{},[71,2815,869],{},[71,2817,1583],{},[71,2819,2820],{},"~74%",[53,2822,2823,2825,2828],{},[71,2824,680],{},[71,2826,2827],{},"90.2%",[71,2829,2830],{},"39.2%",[53,2832,2833,2835,2838],{},[71,2834,864],{},[71,2836,2837],{},"92%",[71,2839,2840],{},"49%",[53,2842,2843,2846,2849],{},[71,2844,2845],{},"GPT-4o",[71,2847,2848],{},"76.6%",[71,2850,2851],{},"13.4%",[30,2853,2854],{},"注意 V3 → R1 在 AIME 上从 39% 飙升到 79%——这就是\"推理模式\"带来的差距。",[37,2856,2857],{"id":2857},"思维链可见",[30,2859,2860],{},"R1 的思维链完全开放——你可以看到模型一步步推理的过程：",[132,2862,2864],{"className":134,"code":2863,"language":136,"meta":137,"style":137},"resp = client.chat.completions.create(\n    model=\"deepseek-reasoner\",   # R1 的别名\n    messages=[{\"role\": \"user\", \"content\": \"证明...\"}],\n)\n# 关键字段\nprint(resp.choices[0].message.reasoning_content)   # 思维链\nprint(resp.choices[0].message.content)             # 最终回答\n",[139,2865,2866,2875,2890,2915,2919,2924,2939],{"__ignoreMap":137},[142,2867,2868,2870,2872],{"class":144,"line":145},[142,2869,1007],{"class":152},[142,2871,168],{"class":148},[142,2873,2874],{"class":152}," client.chat.completions.create(\n",[142,2876,2877,2879,2881,2884,2887],{"class":144,"line":162},[142,2878,201],{"class":200},[142,2880,168],{"class":148},[142,2882,2883],{"class":206},"\"deepseek-reasoner\"",[142,2885,2886],{"class":152},",   ",[142,2888,2889],{"class":184},"# R1 的别名\n",[142,2891,2892,2894,2896,2898,2900,2902,2904,2906,2908,2910,2913],{"class":144,"line":174},[142,2893,246],{"class":200},[142,2895,168],{"class":148},[142,2897,251],{"class":152},[142,2899,254],{"class":206},[142,2901,257],{"class":152},[142,2903,260],{"class":206},[142,2905,263],{"class":152},[142,2907,266],{"class":206},[142,2909,257],{"class":152},[142,2911,2912],{"class":206},"\"证明...\"",[142,2914,442],{"class":152},[142,2916,2917],{"class":144,"line":181},[142,2918,480],{"class":152},[142,2920,2921],{"class":144,"line":188},[142,2922,2923],{"class":184},"# 关键字段\n",[142,2925,2926,2928,2931,2933,2936],{"class":144,"line":197},[142,2927,1330],{"class":220},[142,2929,2930],{"class":152},"(resp.choices[",[142,2932,238],{"class":220},[142,2934,2935],{"class":152},"].message.reasoning_content)   ",[142,2937,2938],{"class":184},"# 思维链\n",[142,2940,2941,2943,2945,2947,2950],{"class":144,"line":5},[142,2942,1330],{"class":220},[142,2944,2930],{"class":152},[142,2946,238],{"class":220},[142,2948,2949],{"class":152},"].message.content)             ",[142,2951,2952],{"class":184},"# 最终回答\n",[30,2954,2955,2958],{},[139,2956,2957],{},"reasoning_content"," 字段在 OpenAI 兼容接口上是 DeepSeek 的扩展。这对以下场景特别有价值：",[568,2960,2961,2967,2973,2979],{},[571,2962,2963,2966],{},[123,2964,2965],{},"教育场景"," — 学生可以看到解题思路",[571,2968,2969,2972],{},[123,2970,2971],{},"调试场景"," — 开发者可以理解模型为什么这样回答",[571,2974,2975,2978],{},[123,2976,2977],{},"信任建立"," — 可验证的推理过程",[571,2980,2981,2984],{},[123,2982,2983],{},"数据蒸馏"," — 用 R1 的思维链训练小模型（Meta 这么干过）",[37,2986,2987],{"id":2987},"开源",[30,2989,2990,2991,2994],{},"R1 模型权重完全开源（MIT 协议）。可以在自己的 GPU 上部署，不依赖 API。同时 DeepSeek 也放出了 ",[123,2992,2993],{},"R1-Distill"," 系列——把 R1 的能力蒸馏到 Llama \u002F Qwen 的小模型上：",[47,2996,2997,3010],{},[50,2998,2999],{},[53,3000,3001,3004,3007],{},[56,3002,3003],{},"蒸馏版本",[56,3005,3006],{},"显存需求",[56,3008,3009],{},"性能保留",[66,3011,3012,3023,3034],{},[53,3013,3014,3017,3020],{},[71,3015,3016],{},"R1-Distill-Qwen-1.5B",[71,3018,3019],{},"4GB",[71,3021,3022],{},"数学接近 V3",[53,3024,3025,3028,3031],{},[71,3026,3027],{},"R1-Distill-Qwen-7B",[71,3029,3030],{},"16GB",[71,3032,3033],{},"推理接近 GPT-4o",[53,3035,3036,3039,3042],{},[71,3037,3038],{},"R1-Distill-Llama-70B",[71,3040,3041],{},"140GB",[71,3043,3044],{},"接近原版 R1",[30,3046,3047],{},"1.5B 在笔记本上就能跑推理模型——这是 R1 之前完全不可能的。",[26,3049,130],{"id":129},[132,3051,3053],{"className":134,"code":3052,"language":136,"meta":137,"style":137},"from openai import OpenAI\nclient = OpenAI(\n    api_key=\"sk-...\",\n    base_url=\"https:\u002F\u002Fapi.deepseek.com\u002Fv1\",\n)\n\nresp = client.chat.completions.create(\n    model=\"deepseek-reasoner\",\n    messages=[\n        {\"role\": \"user\", \"content\": \"证明素数无穷\"}\n    ],\n    # 注意：R1 不支持 temperature \u002F top_p \u002F presence_penalty 等参数\n    # 传了会被忽略\n    max_tokens=8000,\n)\n\nmsg = resp.choices[0].message\nprint(\"【思考过程】\")\nprint(msg.reasoning_content)\nprint(\"\\n【最终答案】\")\nprint(msg.content)\n",[139,3054,3055,3067,3076,3088,3100,3104,3108,3116,3126,3134,3155,3159,3164,3169,3179,3183,3188,3204,3216,3224,3240],{"__ignoreMap":137},[142,3056,3057,3059,3062,3064],{"class":144,"line":145},[142,3058,149],{"class":148},[142,3060,3061],{"class":152}," openai ",[142,3063,156],{"class":148},[142,3065,3066],{"class":152}," OpenAI\n",[142,3068,3069,3071,3073],{"class":144,"line":162},[142,3070,165],{"class":152},[142,3072,168],{"class":148},[142,3074,3075],{"class":152}," OpenAI(\n",[142,3077,3078,3081,3083,3086],{"class":144,"line":174},[142,3079,3080],{"class":200},"    api_key",[142,3082,168],{"class":148},[142,3084,3085],{"class":206},"\"sk-...\"",[142,3087,210],{"class":152},[142,3089,3090,3093,3095,3098],{"class":144,"line":181},[142,3091,3092],{"class":200},"    base_url",[142,3094,168],{"class":148},[142,3096,3097],{"class":206},"\"https:\u002F\u002Fapi.deepseek.com\u002Fv1\"",[142,3099,210],{"class":152},[142,3101,3102],{"class":144,"line":188},[142,3103,480],{"class":152},[142,3105,3106],{"class":144,"line":197},[142,3107,178],{"emptyLinePlaceholder":177},[142,3109,3110,3112,3114],{"class":144,"line":5},[142,3111,1007],{"class":152},[142,3113,168],{"class":148},[142,3115,2874],{"class":152},[142,3117,3118,3120,3122,3124],{"class":144,"line":230},[142,3119,201],{"class":200},[142,3121,168],{"class":148},[142,3123,2883],{"class":206},[142,3125,210],{"class":152},[142,3127,3128,3130,3132],{"class":144,"line":243},[142,3129,246],{"class":200},[142,3131,168],{"class":148},[142,3133,342],{"class":152},[142,3135,3136,3138,3140,3142,3144,3146,3148,3150,3153],{"class":144,"line":272},[142,3137,1292],{"class":152},[142,3139,254],{"class":206},[142,3141,257],{"class":152},[142,3143,260],{"class":206},[142,3145,263],{"class":152},[142,3147,266],{"class":206},[142,3149,257],{"class":152},[142,3151,3152],{"class":206},"\"证明素数无穷\"",[142,3154,1316],{"class":152},[142,3156,3157],{"class":144,"line":284},[142,3158,1321],{"class":152},[142,3160,3161],{"class":144,"line":299},[142,3162,3163],{"class":184},"    # 注意：R1 不支持 temperature \u002F top_p \u002F presence_penalty 等参数\n",[142,3165,3166],{"class":144,"line":471},[142,3167,3168],{"class":184},"    # 传了会被忽略\n",[142,3170,3171,3173,3175,3177],{"class":144,"line":477},[142,3172,215],{"class":200},[142,3174,168],{"class":148},[142,3176,1262],{"class":220},[142,3178,210],{"class":152},[142,3180,3181],{"class":144,"line":483},[142,3182,480],{"class":152},[142,3184,3186],{"class":144,"line":3185},16,[142,3187,178],{"emptyLinePlaceholder":177},[142,3189,3191,3194,3196,3199,3201],{"class":144,"line":3190},17,[142,3192,3193],{"class":152},"msg ",[142,3195,168],{"class":148},[142,3197,3198],{"class":152}," resp.choices[",[142,3200,238],{"class":220},[142,3202,3203],{"class":152},"].message\n",[142,3205,3207,3209,3211,3214],{"class":144,"line":3206},18,[142,3208,1330],{"class":220},[142,3210,1136],{"class":152},[142,3212,3213],{"class":206},"\"【思考过程】\"",[142,3215,480],{"class":152},[142,3217,3219,3221],{"class":144,"line":3218},19,[142,3220,1330],{"class":220},[142,3222,3223],{"class":152},"(msg.reasoning_content)\n",[142,3225,3227,3229,3231,3233,3235,3238],{"class":144,"line":3226},20,[142,3228,1330],{"class":220},[142,3230,1136],{"class":152},[142,3232,372],{"class":206},[142,3234,1310],{"class":220},[142,3236,3237],{"class":206},"【最终答案】\"",[142,3239,480],{"class":152},[142,3241,3243,3245],{"class":144,"line":3242},21,[142,3244,1330],{"class":220},[142,3246,3247],{"class":152},"(msg.content)\n",[37,3249,3251],{"id":3250},"multi-turn-注意事项","Multi-turn 注意事项",[30,3253,3254,3257,3258,3260,3261,2617],{},[123,3255,3256],{},"重要","：multi-turn 对话时不要把 ",[139,3259,2957],{}," 加回 messages 历史，只保留 ",[139,3262,3263],{},"content",[132,3265,3267],{"className":134,"code":3266,"language":136,"meta":137,"style":137},"# ❌ 错误：把思维链塞回历史，下一轮上下文翻倍\nhistory.append({\"role\": \"assistant\", \"content\": msg.reasoning_content + msg.content})\n\n# ✅ 正确：只保留最终答案\nhistory.append({\"role\": \"assistant\", \"content\": msg.content})\n",[139,3268,3269,3274,3299,3303,3308],{"__ignoreMap":137},[142,3270,3271],{"class":144,"line":145},[142,3272,3273],{"class":184},"# ❌ 错误：把思维链塞回历史，下一轮上下文翻倍\n",[142,3275,3276,3279,3281,3283,3286,3288,3290,3293,3296],{"class":144,"line":162},[142,3277,3278],{"class":152},"history.append({",[142,3280,254],{"class":206},[142,3282,257],{"class":152},[142,3284,3285],{"class":206},"\"assistant\"",[142,3287,263],{"class":152},[142,3289,266],{"class":206},[142,3291,3292],{"class":152},": msg.reasoning_content ",[142,3294,3295],{"class":148},"+",[142,3297,3298],{"class":152}," msg.content})\n",[142,3300,3301],{"class":144,"line":174},[142,3302,178],{"emptyLinePlaceholder":177},[142,3304,3305],{"class":144,"line":181},[142,3306,3307],{"class":184},"# ✅ 正确：只保留最终答案\n",[142,3309,3310,3312,3314,3316,3318,3320,3322],{"class":144,"line":188},[142,3311,3278],{"class":152},[142,3313,254],{"class":206},[142,3315,257],{"class":152},[142,3317,3285],{"class":206},[142,3319,263],{"class":152},[142,3321,266],{"class":206},[142,3323,3324],{"class":152},": msg.content})\n",[30,3326,3327],{},"这点新手最容易踩——把思维链当成\"模型记忆\"塞回去，结果上下文成本飞涨且模型困惑。",[26,3329,492],{"id":492},[47,3331,3332,3340],{},[50,3333,3334],{},[53,3335,3336,3338],{},[56,3337,501],{},[56,3339,1362],{},[66,3341,3342,3349,3357],{},[53,3343,3344,3346],{},[71,3345,515],{},[71,3347,3348],{},"¥1 \u002F 百万 token",[53,3350,3351,3354],{},[71,3352,3353],{},"Input（缓存命中）",[71,3355,3356],{},"¥0.1 \u002F 百万 token",[53,3358,3359,3362],{},[71,3360,3361],{},"Output（含思维链）",[71,3363,3364],{},"¥4 \u002F 百万 token",[30,3366,3367,3368,126],{},"注意：R1 的 Output 价格高于 V3（¥2\u002FM），因为思维链 token 也计入 Output。实际使用中，思维链通常占 Output 的 50-80%，所以",[123,3369,3370],{},"实际成本约为 V3 的 3-4 倍",[30,3372,3373],{},"夜间折扣（00:30-08:30）同样适用，再 -50%。复杂数学批量任务定时跑夜间。",[26,3375,3377],{"id":3376},"r1-vs-v3-怎么选","R1 vs V3 怎么选",[47,3379,3380,3392],{},[50,3381,3382],{},[53,3383,3384,3386,3389],{},[56,3385,1451],{},[56,3387,3388],{},"R1（推理）",[56,3390,3391],{},"V3（通用）",[66,3393,3394,3403,3411,3420,3429,3440,3451],{},[53,3395,3396,3399,3401],{},[71,3397,3398],{},"数学\u002F逻辑",[71,3400,644],{},[71,3402,1601],{},[53,3404,3405,3407,3409],{},[71,3406,1471],{},[71,3408,692],{},[71,3410,692],{},[53,3412,3413,3416,3418],{},[71,3414,3415],{},"日常对话",[71,3417,1601],{},[71,3419,692],{},[53,3421,3422,3424,3427],{},[71,3423,629],{},[71,3425,3426],{},"慢（需推理）",[71,3428,1515],{},[53,3430,3431,3434,3437],{},[71,3432,3433],{},"实际成本",[71,3435,3436],{},"¥4\u002FM Output",[71,3438,3439],{},"¥2\u002FM Output",[53,3441,3442,3445,3448],{},[71,3443,3444],{},"多轮对话",[71,3446,3447],{},"麻烦（思维链要剥）",[71,3449,3450],{},"简单",[53,3452,3453,3455,3458],{},[71,3454,1772],{},[71,3456,3457],{},"❌ 不支持",[71,3459,1522],{},[30,3461,3462,2617],{},[123,3463,3464],{},"建议",[568,3466,3467,3470,3473],{},[571,3468,3469],{},"数学 \u002F 推理 \u002F 算法 → R1",[571,3471,3472],{},"编程 \u002F 对话 \u002F 批量 → V3",[571,3474,3475],{},"Agent 工具调用 → V3（R1 不支持 function calling）",[26,3477,3479],{"id":3478},"r1-vs-openai-o-series-claude-opus-thinking","R1 vs OpenAI o-series \u002F Claude Opus thinking",[47,3481,3482,3497],{},[50,3483,3484],{},[53,3485,3486,3488,3491,3494],{},[56,3487,1451],{},[56,3489,3490],{},"R1",[56,3492,3493],{},"OpenAI o-series（已并入 GPT-5）",[56,3495,3496],{},"Claude Opus 4 thinking",[66,3498,3499,3511,3522,3536,3547],{},[53,3500,3501,3503,3506,3509],{},[71,3502,2857],{},[71,3504,3505],{},"✅ 完整",[71,3507,3508],{},"部分（summary）",[71,3510,3505],{},[53,3512,3513,3515,3518,3520],{},[71,3514,2987],{},[71,3516,3517],{},"✅ MIT",[71,3519,2601],{},[71,3521,2601],{},[53,3523,3524,3527,3530,3533],{},[71,3525,3526],{},"价格 Output",[71,3528,3529],{},"¥4\u002FM",[71,3531,3532],{},"$10-$60\u002FM",[71,3534,3535],{},"$75\u002FM",[53,3537,3538,3541,3543,3545],{},[71,3539,3540],{},"数学（AIME）",[71,3542,2758],{},[71,3544,2803],{},[71,3546,2820],{},[53,3548,3549,3551,3553,3555],{},[71,3550,1772],{},[71,3552,2601],{},[71,3554,1522],{},[71,3556,1522],{},[30,3558,3559,3560,3563],{},"R1 的核心定位：",[123,3561,3562],{},"开源 + 思维链可见 + 价格极低","——研究、教育、蒸馏小模型的首选。",[26,3565,3566],{"id":3566},"适合场景",[30,3568,788],{},[568,3570,3571,3574,3577,3580,3583,3586],{},[571,3572,3573],{},"数学题求解 \u002F 证明",[571,3575,3576],{},"算法设计 \u002F 复杂逻辑推理",[571,3578,3579],{},"科学问题分析",[571,3581,3582],{},"学术研究 \u002F 论文公式推导",[571,3584,3585],{},"代码调试时的根因分析（让 R1 解释为什么 bug）",[571,3587,3588,3589,3593],{},"训练数据生成（用 R1 思维链做 SFT 数据，详见 ",[861,3590,3592],{"href":3591},"\u002Fwiki\u002Flora.html","LoRA","）",[30,3595,808],{},[568,3597,3598,3601,3604,3607],{},[571,3599,3600],{},"日常对话 \u002F 客服（太慢太贵）",[571,3602,3603],{},"Agent 工具调用（不支持）",[571,3605,3606],{},"实时聊天（首 token 等很久）",[571,3608,3609],{},"简单分类 \u002F 抽取（V3 \u002F Haiku 更划算）",[26,3611,825],{"id":825},[568,3613,3614,3620,3626,3632,3638],{},[571,3615,3616,3619],{},[123,3617,3618],{},"不支持 function calling","：R1 不能直接做 Agent 的工具调用层，只能做\"先推理再交给 V3 \u002F GPT 执行\"。",[571,3621,3622,3625],{},[123,3623,3624],{},"不要传 temperature","：R1 不支持采样参数，传了被忽略，不要从 V3 代码硬迁过来。",[571,3627,3628,3631],{},[123,3629,3630],{},"思维链不可缓存","：思维链每次重新生成，prompt cache 不能复用思维链——这是 R1 比 V3 贵的根本原因。",[571,3633,3634,3637],{},[123,3635,3636],{},"multi-turn 把思维链剥掉","：上面已强调，最常见的踩坑。",[571,3639,3640,3643],{},[123,3641,3642],{},"本地跑选蒸馏版","：满血 R1 部署门槛极高（同 V3），普通人用 R1-Distill-7B \u002F 14B 走 Ollama 即可体验。",[26,3645,854],{"id":854},[568,3647,3648,3654,3659,3664],{},[571,3649,3650,3651],{},"通用兄弟：",[861,3652,680],{"href":3653},"\u002Fmodels\u002Fdeepseek-v3.html",[571,3655,3656,3657],{},"推理模型概念：",[861,3658,1349],{"href":1348},[571,3660,3661,3662],{},"思维链与 Prompt：",[861,3663,1683],{"href":995},[571,3665,3666,3667,865,3670],{},"与同档对比：",[861,3668,1558],{"href":3669},"\u002Fmodels\u002Fgpt-5.html",[861,3671,869],{"href":868},[885,3673,3674],{},"html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .s4XuR, html code.shiki .s4XuR{--shiki-default:#E36209;--shiki-dark:#FFAB70}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":137,"searchDepth":174,"depth":174,"links":3676},[3677,3678,3683,3686,3687,3688,3689,3690,3691],{"id":28,"depth":162,"text":28},{"id":35,"depth":162,"text":35,"children":3679},[3680,3681,3682],{"id":2777,"depth":174,"text":2777},{"id":2857,"depth":174,"text":2857},{"id":2987,"depth":174,"text":2987},{"id":129,"depth":162,"text":130,"children":3684},[3685],{"id":3250,"depth":174,"text":3251},{"id":492,"depth":162,"text":492},{"id":3376,"depth":162,"text":3377},{"id":3478,"depth":162,"text":3479},{"id":3566,"depth":162,"text":3566},{"id":825,"depth":162,"text":825},{"id":854,"depth":162,"text":854},128000,"深度求索 DeepSeek-R1 开源推理大模型，完整暴露思维链（Chain of Thought）推理过程，数学与代码推理能力对标 GPT-5\u002Fo3，API 输入仅 ¥1\u002FM 是 OpenAI 同级的 1\u002F30，国内可直连且权重开放，支持私有部署。",32768,{},"\u002Fmodels\u002Fdeepseek-r1","Input ¥1\u002FM (缓存 ¥0.1\u002FM) · Output ¥4\u002FM（含思维链）",[3699,3700],"coding\u002Fapi\u002Fopenrouter","coding\u002Flocal\u002Follama","2025-01-20",{"title":2750,"description":3693},"deepseek-r1","models\u002Fdeepseek-r1",[3706,3707,3708,3709,3710],"开源推理模型，思维链完全可见","数学推理能力接近 GPT-5","价格极低，推理模型中性价比最高","国内直连，响应快","可自行部署（开源权重）",[3712,3713,3714,3579],"数学竞赛 \u002F 证明题","复杂逻辑推理","算法设计","深度求索","DeepSeek",[3718,3719,3720,3721],"思维链 token 也计费，实际成本高于 V3","非推理任务不如 V3（速度更慢）","128K 上下文","输出含思维链，需额外解析","QPoX4cd8yNBbcGzFm19_QrMGr08pcUAycWyN8TOyN9I",{"id":3724,"title":680,"apiCompatible":3725,"benchmarks":3726,"body":3735,"category":907,"contextWindow":3692,"description":4524,"extension":910,"maxOutput":4525,"meta":4526,"navigation":177,"path":4527,"pricing":4528,"published":915,"relatedTools":4529,"releaseDate":3701,"seo":4531,"slug":4532,"stem":4533,"strengths":4534,"updated":915,"useCases":4539,"vendor":3715,"vendorEn":3716,"weaknesses":4544,"__hash__":4549},"models\u002Fmodels\u002Fdeepseek-v3.md",[2752],[3727,3728,3730,3732],{"name":14,"score":689},{"name":17,"score":3729},"88.5%",{"name":20,"score":3731},"84.1%",{"name":3733,"score":3734},"CMMLU","89.7%",{"type":23,"value":3736,"toc":4503},[3737,3739,3742,3744,3747,3750,3761,3765,3772,3834,3837,3839,3842,3867,3871,3878,3880,3883,3885,3888,4031,4035,4038,4082,4085,4089,4095,4097,4186,4188,4219,4222,4228,4231,4234,4284,4287,4289,4330,4333,4421,4423,4472,4474,4500],[26,3738,28],{"id":28},[30,3740,3741],{},"DeepSeek-V3 是深度求索于 2025 年 1 月发布的 671B 参数 MoE 模型，总参数 671B 但每次推理仅激活 37B。最大优势是极致性价比——API 价格是 Claude Sonnet 4 的 1\u002F20，且完全开源可自行部署。",[26,3743,35],{"id":35},[37,3745,3746],{"id":3746},"极致性价比",[30,3748,3749],{},"Input ¥1\u002FM token，Output ¥2\u002FM token，缓存命中后 Input 仅 ¥0.1\u002FM。这个价格意味着：",[568,3751,3752,3755,3758],{},[571,3753,3754],{},"100 万字中文处理成本约 ¥2",[571,3756,3757],{},"一个中型项目全量代码分析约 ¥5",[571,3759,3760],{},"批量处理 10 万条数据约 ¥20",[37,3762,3764],{"id":3763},"prompt-cache自动命中","Prompt Cache（自动命中）",[30,3766,3767,3768,3771],{},"DeepSeek 的 cache 完全自动——任何重复出现的 prompt 前缀（≥64 token）自动命中，Input 价格 -90%。响应里的 ",[139,3769,3770],{},"usage.prompt_cache_hit_tokens"," 字段显示命中量：",[132,3773,3775],{"className":134,"code":3774,"language":136,"meta":137,"style":137},"resp = client.chat.completions.create(\n    model=\"deepseek-chat\",\n    messages=[...],\n)\nprint(resp.usage.prompt_cache_hit_tokens)   # 命中 cache 的 input token 数\nprint(resp.usage.prompt_cache_miss_tokens)  # 没命中的\n",[139,3776,3777,3785,3796,3810,3814,3824],{"__ignoreMap":137},[142,3778,3779,3781,3783],{"class":144,"line":145},[142,3780,1007],{"class":152},[142,3782,168],{"class":148},[142,3784,2874],{"class":152},[142,3786,3787,3789,3791,3794],{"class":144,"line":162},[142,3788,201],{"class":200},[142,3790,168],{"class":148},[142,3792,3793],{"class":206},"\"deepseek-chat\"",[142,3795,210],{"class":152},[142,3797,3798,3800,3802,3805,3807],{"class":144,"line":174},[142,3799,246],{"class":200},[142,3801,168],{"class":148},[142,3803,3804],{"class":152},"[",[142,3806,1939],{"class":220},[142,3808,3809],{"class":152},"],\n",[142,3811,3812],{"class":144,"line":181},[142,3813,480],{"class":152},[142,3815,3816,3818,3821],{"class":144,"line":188},[142,3817,1330],{"class":220},[142,3819,3820],{"class":152},"(resp.usage.prompt_cache_hit_tokens)   ",[142,3822,3823],{"class":184},"# 命中 cache 的 input token 数\n",[142,3825,3826,3828,3831],{"class":144,"line":197},[142,3827,1330],{"class":220},[142,3829,3830],{"class":152},"(resp.usage.prompt_cache_miss_tokens)  ",[142,3832,3833],{"class":184},"# 没命中的\n",[30,3835,3836],{},"实测：固定 system prompt + 工具定义共 5000 token，cache 命中后单次调用 input 成本从 ¥0.005 降到 ¥0.0005——批量场景一晚省好几张百元钞。",[37,3838,2987],{"id":2987},[30,3840,3841],{},"模型权重完全开源（MIT 协议），可以在自己的 GPU 上部署：",[568,3843,3844,3847,3850,3860],{},[571,3845,3846],{},"满血版 FP8：8×H100 (~640GB 显存)",[571,3848,3849],{},"量化版 INT4：2×H100 或 4×A100",[571,3851,3852,3853,3859],{},"通过 ",[861,3854,3858],{"href":3855,"rel":3856},"https:\u002F\u002Fgithub.com\u002Fvllm-project\u002Fvllm",[3857],"nofollow","vLLM"," \u002F SGLang 高性能推理",[571,3861,3852,3862,3866],{},[861,3863,3865],{"href":3864},"\u002Fcoding\u002Flocal\u002Follama.html","Ollama"," 体验（推荐 70B 蒸馏版，本地能跑）",[37,3868,3870],{"id":3869},"moe-架构特性","MoE 架构特性",[30,3872,3873,3874,3877],{},"671B 总参数 \u002F 37B 激活——推理时只激活 37B，速度接近 37B 模型，质量接近 671B 模型。但显存要求按总参数算（必须把 671B 都加载进显存），所以",[123,3875,3876],{},"自己部署门槛极高","。绝大多数人通过 API 用即可。",[37,3879,1471],{"id":1471},[30,3881,3882],{},"SWE-bench Verified 61.2%，接近第一梯队。在 Cursor、Aider 等工具中通过 OpenAI 兼容 API 接入，体验接近 Claude Sonnet 4 的 80% 水平。",[26,3884,130],{"id":129},[30,3886,3887],{},"DeepSeek 提供 OpenAI 兼容 API：",[132,3889,3891],{"className":134,"code":3890,"language":136,"meta":137,"style":137},"from openai import OpenAI\nclient = OpenAI(\n    api_key=\"sk-...\",\n    base_url=\"https:\u002F\u002Fapi.deepseek.com\u002Fv1\",\n)\n\nresp = client.chat.completions.create(\n    model=\"deepseek-chat\",        # V3 别名（推理用 deepseek-reasoner = R1）\n    temperature=0.0,\n    messages=[\n        {\"role\": \"system\", \"content\": \"你是 Python 高级工程师。\"},\n        {\"role\": \"user\", \"content\": \"Review this code...\"},\n    ],\n)\n",[139,3892,3893,3903,3911,3921,3931,3935,3939,3947,3961,3972,3980,4002,4023,4027],{"__ignoreMap":137},[142,3894,3895,3897,3899,3901],{"class":144,"line":145},[142,3896,149],{"class":148},[142,3898,3061],{"class":152},[142,3900,156],{"class":148},[142,3902,3066],{"class":152},[142,3904,3905,3907,3909],{"class":144,"line":162},[142,3906,165],{"class":152},[142,3908,168],{"class":148},[142,3910,3075],{"class":152},[142,3912,3913,3915,3917,3919],{"class":144,"line":174},[142,3914,3080],{"class":200},[142,3916,168],{"class":148},[142,3918,3085],{"class":206},[142,3920,210],{"class":152},[142,3922,3923,3925,3927,3929],{"class":144,"line":181},[142,3924,3092],{"class":200},[142,3926,168],{"class":148},[142,3928,3097],{"class":206},[142,3930,210],{"class":152},[142,3932,3933],{"class":144,"line":188},[142,3934,480],{"class":152},[142,3936,3937],{"class":144,"line":197},[142,3938,178],{"emptyLinePlaceholder":177},[142,3940,3941,3943,3945],{"class":144,"line":5},[142,3942,1007],{"class":152},[142,3944,168],{"class":148},[142,3946,2874],{"class":152},[142,3948,3949,3951,3953,3955,3958],{"class":144,"line":230},[142,3950,201],{"class":200},[142,3952,168],{"class":148},[142,3954,3793],{"class":206},[142,3956,3957],{"class":152},",        ",[142,3959,3960],{"class":184},"# V3 别名（推理用 deepseek-reasoner = R1）\n",[142,3962,3963,3965,3967,3970],{"class":144,"line":243},[142,3964,233],{"class":200},[142,3966,168],{"class":148},[142,3968,3969],{"class":220},"0.0",[142,3971,210],{"class":152},[142,3973,3974,3976,3978],{"class":144,"line":272},[142,3975,246],{"class":200},[142,3977,168],{"class":148},[142,3979,342],{"class":152},[142,3981,3982,3984,3986,3988,3991,3993,3995,3997,4000],{"class":144,"line":284},[142,3983,1292],{"class":152},[142,3985,254],{"class":206},[142,3987,257],{"class":152},[142,3989,3990],{"class":206},"\"system\"",[142,3992,263],{"class":152},[142,3994,266],{"class":206},[142,3996,257],{"class":152},[142,3998,3999],{"class":206},"\"你是 Python 高级工程师。\"",[142,4001,1064],{"class":152},[142,4003,4004,4006,4008,4010,4012,4014,4016,4018,4021],{"class":144,"line":299},[142,4005,1292],{"class":152},[142,4007,254],{"class":206},[142,4009,257],{"class":152},[142,4011,260],{"class":206},[142,4013,263],{"class":152},[142,4015,266],{"class":206},[142,4017,257],{"class":152},[142,4019,4020],{"class":206},"\"Review this code...\"",[142,4022,1064],{"class":152},[142,4024,4025],{"class":144,"line":471},[142,4026,1321],{"class":152},[142,4028,4029],{"class":144,"line":477},[142,4030,480],{"class":152},[37,4032,4034],{"id":4033},"与-aider-配合用","与 Aider 配合用",[30,4036,4037],{},"Aider 是用 DeepSeek-V3 最方便的 CLI 之一：",[132,4039,4043],{"className":4040,"code":4041,"language":4042,"meta":137,"style":137},"language-bash shiki shiki-themes github-light github-dark","export OPENAI_API_KEY=sk-...\nexport OPENAI_API_BASE=https:\u002F\u002Fapi.deepseek.com\u002Fv1\naider --model deepseek-chat\n","bash",[139,4044,4045,4058,4070],{"__ignoreMap":137},[142,4046,4047,4050,4053,4055],{"class":144,"line":145},[142,4048,4049],{"class":148},"export",[142,4051,4052],{"class":152}," OPENAI_API_KEY",[142,4054,168],{"class":148},[142,4056,4057],{"class":152},"sk-...\n",[142,4059,4060,4062,4065,4067],{"class":144,"line":162},[142,4061,4049],{"class":148},[142,4063,4064],{"class":152}," OPENAI_API_BASE",[142,4066,168],{"class":148},[142,4068,4069],{"class":152},"https:\u002F\u002Fapi.deepseek.com\u002Fv1\n",[142,4071,4072,4076,4079],{"class":144,"line":174},[142,4073,4075],{"class":4074},"sScJk","aider",[142,4077,4078],{"class":220}," --model",[142,4080,4081],{"class":206}," deepseek-chat\n",[30,4083,4084],{},"成本：一晚做完一个中型 feature 通常 ¥1-3，是用 Claude 的 1\u002F20。",[37,4086,4088],{"id":4087},"在-cursor-中接入","在 Cursor 中接入",[132,4090,4093],{"className":4091,"code":4092,"language":435},[1538],"Cursor → Settings → Models → Add Model\n  Provider: OpenAI\n  Base URL: https:\u002F\u002Fapi.deepseek.com\u002Fv1\n  Model: deepseek-chat\n  API Key: sk-...\n",[139,4094,4092],{"__ignoreMap":137},[26,4096,2222],{"id":2222},[47,4098,4099,4110],{},[50,4100,4101],{},[53,4102,4103,4105,4107],{},[56,4104,2231],{},[56,4106,713],{},[56,4108,4109],{},"说明",[66,4111,4112,4124,4134,4147,4159,4171],{},[53,4113,4114,4118,4121],{},[71,4115,4116],{},[139,4117,1344],{},[71,4119,4120],{},"0.0-0.3",[71,4122,4123],{},"编程 \u002F 工具调用",[53,4125,4126,4130,4132],{},[71,4127,4128],{},[139,4129,1344],{},[71,4131,1273],{},[71,4133,2261],{},[53,4135,4136,4141,4144],{},[71,4137,4138],{},[139,4139,4140],{},"top_p",[71,4142,4143],{},"0.95",[71,4145,4146],{},"DeepSeek 默认",[53,4148,4149,4153,4156],{},[71,4150,4151],{},[139,4152,2279],{},[71,4154,4155],{},"显式设",[71,4157,4158],{},"默认 4K，长输出务必调高（上限 8K）",[53,4160,4161,4166,4168],{},[71,4162,4163],{},[139,4164,4165],{},"frequency_penalty",[71,4167,238],{},[71,4169,4170],{},"一般不动",[53,4172,4173,4178,4183],{},[71,4174,4175],{},[139,4176,4177],{},"response_format",[71,4179,4180],{},[139,4181,4182],{},"{\"type\": \"json_object\"}",[71,4184,4185],{},"JSON 模式",[26,4187,492],{"id":492},[47,4189,4190,4198],{},[50,4191,4192],{},[53,4193,4194,4196],{},[56,4195,501],{},[56,4197,1362],{},[66,4199,4200,4206,4212],{},[53,4201,4202,4204],{},[71,4203,515],{},[71,4205,3348],{},[53,4207,4208,4210],{},[71,4209,3353],{},[71,4211,3356],{},[53,4213,4214,4216],{},[71,4215,529],{},[71,4217,4218],{},"¥2 \u002F 百万 token",[30,4220,4221],{},"这个价格是 GLM-5.2 的一半，是 Claude Sonnet 4 的 1\u002F20。",[30,4223,4224,4227],{},[123,4225,4226],{},"夜间折扣","：北京时间 00:30-08:30，所有价格再 -50%。批量数据处理可以定时跑在夜间。",[26,4229,4230],{"id":4230},"自行部署",[30,4232,4233],{},"如果数据敏感不能上云：",[47,4235,4236,4249],{},[50,4237,4238],{},[53,4239,4240,4243,4246],{},[56,4241,4242],{},"配置",[56,4244,4245],{},"性能",[56,4247,4248],{},"成本",[66,4250,4251,4262,4273],{},[53,4252,4253,4256,4259],{},[71,4254,4255],{},"8×H100 FP8 满血",[71,4257,4258],{},"~50 tok\u002Fs 单并发",[71,4260,4261],{},"~¥30-50 万\u002F月（云租赁）",[53,4263,4264,4267,4270],{},[71,4265,4266],{},"4×H100 INT4 量化",[71,4268,4269],{},"~30 tok\u002Fs 单并发",[71,4271,4272],{},"~¥15-25 万\u002F月",[53,4274,4275,4278,4281],{},[71,4276,4277],{},"7B\u002F13B 蒸馏版（Ollama）",[71,4279,4280],{},"笔记本可跑",[71,4282,4283],{},"几乎免费",[30,4285,4286],{},"蒸馏版是 Meta 把 V3 的输出蒸馏到 Llama \u002F Qwen 上的小模型，能力差距明显但本地能跑。",[26,4288,2458],{"id":2458},[568,4290,4291,4297,4303,4318,4324],{},[571,4292,4293,4296],{},[123,4294,4295],{},"批量处理","：价格极低，适合大规模文本分类、摘要、翻译",[571,4298,4299,4302],{},[123,4300,4301],{},"私有化部署","：开源协议允许商用，企业可在自有 GPU 上部署",[571,4304,4305,4308,4309,865,4313,4317],{},[123,4306,4307],{},"编程辅助","：通过 API 接入 ",[861,4310,4312],{"href":4311},"\u002Fcoding\u002Fide\u002Fcursor.html","Cursor",[861,4314,4316],{"href":4315},"\u002Fcoding\u002Fcli\u002Faider.html","Aider","，低成本替代 Claude",[571,4319,4320,4323],{},[123,4321,4322],{},"研究实验","：开源权重可用于学术研究和模型微调",[571,4325,4326,4329],{},[123,4327,4328],{},"后端模型","：Coze \u002F Dify \u002F 自建 Agent 平台的低成本后端",[26,4331,4332],{"id":4332},"与同档对比",[47,4334,4335,4348],{},[50,4336,4337],{},[53,4338,4339,4341,4343,4345],{},[56,4340,1451],{},[56,4342,680],{},[56,4344,2504],{},[56,4346,4347],{},"Qwen 3",[66,4349,4350,4363,4374,4384,4396,4409],{},[53,4351,4352,4354,4357,4360],{},[71,4353,1362],{},[71,4355,4356],{},"¥1\u002F¥2",[71,4358,4359],{},"¥2\u002F¥6",[71,4361,4362],{},"¥0.8\u002F¥2",[53,4364,4365,4367,4369,4371],{},[71,4366,626],{},[71,4368,689],{},[71,4370,2521],{},[71,4372,4373],{},"58.4%",[53,4375,4376,4378,4380,4382],{},[71,4377,2526],{},[71,4379,2537],{},[71,4381,2537],{},[71,4383,2537],{},[53,4385,4386,4388,4391,4393],{},[71,4387,1499],{},[71,4389,4390],{},"8K（短）",[71,4392,1502],{},[71,4394,4395],{},"16K",[53,4397,4398,4400,4403,4406],{},[71,4399,2987],{},[71,4401,4402],{},"✅ 完全",[71,4404,4405],{},"部分",[71,4407,4408],{},"✅ 全系列",[53,4410,4411,4414,4417,4419],{},[71,4412,4413],{},"缓存折扣",[71,4415,4416],{},"✅ 自动 -90%",[71,4418,1522],{},[71,4420,1522],{},[26,4422,825],{"id":825},[568,4424,4425,4431,4448,4454,4460],{},[571,4426,4427,4430],{},[123,4428,4429],{},"8K 输出窗口最短","：长文件 \u002F 长报告生成会被截断。需要 32K+ 输出选 GLM-5.2 或 Qwen。",[571,4432,4433,4442,4443,4447],{},[123,4434,4435,4438,4439],{},[139,4436,4437],{},"deepseek-chat"," vs ",[139,4440,4441],{},"deepseek-reasoner","：前者是 V3 通用，后者是 ",[861,4444,4446],{"href":4445},"\u002Fmodels\u002Fdeepseek-r1.html","R1 推理模型","。别选错。",[571,4449,4450,4453],{},[123,4451,4452],{},"MoE 部署门槛","：想自部署\"满血版\"必须有 8×H100，否则别想，老老实实用 API。",[571,4455,4456,4459],{},[123,4457,4458],{},"dev 服务器并发","：免费\u002F低 tier RPM 限制较紧，生产前务必充值升级。",[571,4461,4462,4465,4466,4468,4469,4471],{},[123,4463,4464],{},"思维链不要直接喂回去","：deepseek-chat 不输出思维链，但 reasoner 会。Multi-turn 时要把 ",[139,4467,2957],{}," 字段剥掉，只把 ",[139,4470,3263],{}," 作为 assistant 历史，否则下一轮上下文翻倍。",[26,4473,854],{"id":854},[568,4475,4476,4481,4490,4495],{},[571,4477,4478,4479],{},"推理兄弟：",[861,4480,2750],{"href":4445},[571,4482,4483,4484,865,4487],{},"同档国产：",[861,4485,2504],{"href":4486},"\u002Fmodels\u002Fglm-5.2.html",[861,4488,4347],{"href":4489},"\u002Fmodels\u002Fqwen-3.html",[571,4491,4492,4493],{},"工具集成：",[861,4494,2676],{"href":1771},[571,4496,4497,4498],{},"成本控制：",[861,4499,876],{"href":875},[885,4501,4502],{},"html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .s4XuR, html code.shiki .s4XuR{--shiki-default:#E36209;--shiki-dark:#FFAB70}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .sScJk, html code.shiki .sScJk{--shiki-default:#6F42C1;--shiki-dark:#B392F0}",{"title":137,"searchDepth":174,"depth":174,"links":4504},[4505,4506,4513,4517,4518,4519,4520,4521,4522,4523],{"id":28,"depth":162,"text":28},{"id":35,"depth":162,"text":35,"children":4507},[4508,4509,4510,4511,4512],{"id":3746,"depth":174,"text":3746},{"id":3763,"depth":174,"text":3764},{"id":2987,"depth":174,"text":2987},{"id":3869,"depth":174,"text":3870},{"id":1471,"depth":174,"text":1471},{"id":129,"depth":162,"text":130,"children":4514},[4515,4516],{"id":4033,"depth":174,"text":4034},{"id":4087,"depth":174,"text":4088},{"id":2222,"depth":162,"text":2222},{"id":492,"depth":162,"text":492},{"id":4230,"depth":162,"text":4230},{"id":2458,"depth":162,"text":2458},{"id":4332,"depth":162,"text":4332},{"id":825,"depth":162,"text":825},{"id":854,"depth":162,"text":854},"深度求索 DeepSeek-V3 开源 MoE 大模型，671B 总参数 \u002F 37B 激活，编程与数学逼近 GPT-4o 第一梯队，API 输入 ¥1\u002FM 全网最低，权重 MIT 协议开放，国内直连无延迟，支持私有化部署。",8192,{},"\u002Fmodels\u002Fdeepseek-v3","Input ¥1\u002FM (缓存 ¥0.1\u002FM) · Output ¥2\u002FM",[3699,3700,4530],"coding\u002Fcli\u002Faider",{"title":680,"description":4524},"deepseek-v3","models\u002Fdeepseek-v3",[4535,4536,4537,4538,3709],"开源 671B MoE 模型，可自行部署","API 价格全网最低，比 GLM-5.2 还便宜","编程能力接近第一梯队","支持思维链（Chain-of-Thought）推理",[4540,4541,4542,4543],"低成本高吞吐 API 调用","私有化部署（开源版本）","编程辅助（通过 API 接入 Cursor 等）","中文 NLP 任务",[4545,4546,4547,4548],"8K 输出窗口偏短，长文件生成受限","多步 Agent 场景下稳定性不如 Claude","非编程场景的推理能力略逊 GPT-5","MoE 架构推理部署需要较大显存","bCGleH0DdzspozZcCiOexWK4qL1hRuqerBA0rk8MxcE",{"id":4551,"title":4552,"apiCompatible":4553,"benchmarks":4554,"body":4561,"category":907,"contextWindow":5397,"description":5398,"extension":910,"maxOutput":911,"meta":5399,"navigation":177,"path":5400,"pricing":5401,"published":915,"relatedTools":5402,"releaseDate":5404,"seo":5405,"slug":5344,"stem":5406,"strengths":5407,"updated":915,"useCases":5413,"vendor":5417,"vendorEn":5418,"weaknesses":5419,"__hash__":5424},"models\u002Fmodels\u002Fdoubao-1-5-pro.md","Doubao 1.5 Pro",[2752],[4555,4557,4559],{"name":17,"score":4556},"78.3%",{"name":20,"score":4558},"77.6%",{"name":3733,"score":4560},"86.2%",{"type":23,"value":4562,"toc":5379},[4563,4565,4568,4574,4576,4580,4583,4630,4633,4647,4650,4653,4727,4730,4733,4736,4753,4755,4758,4772,4775,4777,4780,4907,4921,4924,5009,5018,5020,5053,5059,5062,5064,5106,5109,5145,5149,5152,5158,5161,5165,5293,5295,5346,5348,5376],[26,4564,28],{"id":28},[30,4566,4567],{},"Doubao 1.5 Pro 是字节跳动于 2025 年 6 月发布的旗舰模型。最大优势是字节生态集成——飞书、抖音、剪映等字节系产品原生支持豆包模型，企业用户在火山引擎上一键调用。",[30,4569,4570,4571,126],{},"豆包的策略路线很清晰：",[123,4572,4573],{},"不与 GLM \u002F DeepSeek 抢编程市场，而是吃字节生态的\"内场\"——飞书办公场景、抖音内容场景、Coze 平台后端",[26,4575,35],{"id":35},[37,4577,4579],{"id":4578},"_256k-上下文","256K 上下文",[30,4581,4582],{},"在国产模型中上下文窗口较长：",[47,4584,4585,4593],{},[50,4586,4587],{},[53,4588,4589,4591],{},[56,4590,619],{},[56,4592,2526],{},[66,4594,4595,4602,4608,4614,4622],{},[53,4596,4597,4599],{},[71,4598,4552],{},[71,4600,4601],{},"256K",[53,4603,4604,4606],{},[71,4605,2504],{},[71,4607,2537],{},[53,4609,4610,4612],{},[71,4611,680],{},[71,4613,2537],{},[53,4615,4616,4619],{},[71,4617,4618],{},"Kimi K2",[71,4620,4621],{},"128K-256K",[53,4623,4624,4627],{},[71,4625,4626],{},"Qwen-Long",[71,4628,4629],{},"10M",[30,4631,4632],{},"适合处理：",[568,4634,4635,4638,4641,4644],{},[571,4636,4637],{},"中型项目全量代码",[571,4639,4640],{},"长篇法律文档",[571,4642,4643],{},"多份合同对比",[571,4645,4646],{},"完整学术论文",[37,4648,4649],{"id":4649},"字节生态",[30,4651,4652],{},"豆包深度集成字节系产品：",[47,4654,4655,4665],{},[50,4656,4657],{},[53,4658,4659,4662],{},[56,4660,4661],{},"产品",[56,4663,4664],{},"集成方式",[66,4666,4667,4677,4687,4697,4707,4717],{},[53,4668,4669,4674],{},[71,4670,4671],{},[123,4672,4673],{},"飞书",[71,4675,4676],{},"智能助手、会议纪要、文档摘要、自动回复",[53,4678,4679,4684],{},[71,4680,4681],{},[123,4682,4683],{},"抖音",[71,4685,4686],{},"内容审核、推荐算法辅助、评论分析",[53,4688,4689,4694],{},[71,4690,4691],{},[123,4692,4693],{},"剪映",[71,4695,4696],{},"视频脚本生成、字幕翻译、智能剪辑",[53,4698,4699,4704],{},[71,4700,4701],{},[123,4702,4703],{},"Coze（扣子）",[71,4705,4706],{},"Agent 平台默认后端模型",[53,4708,4709,4714],{},[71,4710,4711],{},[123,4712,4713],{},"Trae",[71,4715,4716],{},"AI IDE 默认模型之一",[53,4718,4719,4724],{},[71,4720,4721],{},[123,4722,4723],{},"巨量引擎",[71,4725,4726],{},"广告文案生成",[30,4728,4729],{},"如果企业已经在用飞书办公，接入豆包做智能助手是最自然的——SSO 打通、数据合规一站搞定。",[37,4731,4732],{"id":4732},"火山引擎",[30,4734,4735],{},"通过火山引擎 API 调用，企业级 SLA 保障：",[568,4737,4738,4741,4744,4747,4750],{},[571,4739,4740],{},"99.95% 可用性",[571,4742,4743],{},"低延迟（国内 \u003C 50ms）",[571,4745,4746],{},"支持私有部署（大客户）",[571,4748,4749],{},"与字节其他云服务（数据库、对象存储）打通",[571,4751,4752],{},"合规备案齐全",[37,4754,2568],{"id":2568},[30,4756,4757],{},"支持图片输入，图片理解能力可处理：",[568,4759,4760,4763,4766,4769],{},[571,4761,4762],{},"OCR 文档识别",[571,4764,4765],{},"商品图分析",[571,4767,4768],{},"表格图理解",[571,4770,4771],{},"UI 截图分析",[30,4773,4774],{},"视频理解能力（豆包视频版）也在持续追赶 Gemini，目前在国产中视频 + 多模态做得最好的之一。",[26,4776,130],{"id":129},[30,4778,4779],{},"豆包通过火山引擎方舟（Ark）平台调用，提供 OpenAI 兼容接口：",[132,4781,4783],{"className":134,"code":4782,"language":136,"meta":137,"style":137},"from openai import OpenAI\n\nclient = OpenAI(\n    api_key=\"...\",\n    base_url=\"https:\u002F\u002Fark.cn-beijing.volces.com\u002Fapi\u002Fv3\",\n)\n\nresp = client.chat.completions.create(\n    model=\"ep-xxxxxxxxx\",     # 你的端点 ID（在火山控制台创建）\n    temperature=0.3,\n    messages=[\n        {\"role\": \"user\", \"content\": \"...\"},\n    ],\n)\n",[139,4784,4785,4795,4799,4807,4818,4829,4833,4837,4845,4860,4871,4879,4899,4903],{"__ignoreMap":137},[142,4786,4787,4789,4791,4793],{"class":144,"line":145},[142,4788,149],{"class":148},[142,4790,3061],{"class":152},[142,4792,156],{"class":148},[142,4794,3066],{"class":152},[142,4796,4797],{"class":144,"line":162},[142,4798,178],{"emptyLinePlaceholder":177},[142,4800,4801,4803,4805],{"class":144,"line":174},[142,4802,165],{"class":152},[142,4804,168],{"class":148},[142,4806,3075],{"class":152},[142,4808,4809,4811,4813,4816],{"class":144,"line":181},[142,4810,3080],{"class":200},[142,4812,168],{"class":148},[142,4814,4815],{"class":206},"\"...\"",[142,4817,210],{"class":152},[142,4819,4820,4822,4824,4827],{"class":144,"line":188},[142,4821,3092],{"class":200},[142,4823,168],{"class":148},[142,4825,4826],{"class":206},"\"https:\u002F\u002Fark.cn-beijing.volces.com\u002Fapi\u002Fv3\"",[142,4828,210],{"class":152},[142,4830,4831],{"class":144,"line":197},[142,4832,480],{"class":152},[142,4834,4835],{"class":144,"line":5},[142,4836,178],{"emptyLinePlaceholder":177},[142,4838,4839,4841,4843],{"class":144,"line":230},[142,4840,1007],{"class":152},[142,4842,168],{"class":148},[142,4844,2874],{"class":152},[142,4846,4847,4849,4851,4854,4857],{"class":144,"line":243},[142,4848,201],{"class":200},[142,4850,168],{"class":148},[142,4852,4853],{"class":206},"\"ep-xxxxxxxxx\"",[142,4855,4856],{"class":152},",     ",[142,4858,4859],{"class":184},"# 你的端点 ID（在火山控制台创建）\n",[142,4861,4862,4864,4866,4869],{"class":144,"line":272},[142,4863,233],{"class":200},[142,4865,168],{"class":148},[142,4867,4868],{"class":220},"0.3",[142,4870,210],{"class":152},[142,4872,4873,4875,4877],{"class":144,"line":284},[142,4874,246],{"class":200},[142,4876,168],{"class":148},[142,4878,342],{"class":152},[142,4880,4881,4883,4885,4887,4889,4891,4893,4895,4897],{"class":144,"line":299},[142,4882,1292],{"class":152},[142,4884,254],{"class":206},[142,4886,257],{"class":152},[142,4888,260],{"class":206},[142,4890,263],{"class":152},[142,4892,266],{"class":206},[142,4894,257],{"class":152},[142,4896,4815],{"class":206},[142,4898,1064],{"class":152},[142,4900,4901],{"class":144,"line":471},[142,4902,1321],{"class":152},[142,4904,4905],{"class":144,"line":477},[142,4906,480],{"class":152},[30,4908,4909,4912,4913,4916,4917,4920],{},[123,4910,4911],{},"注意","：豆包的 ",[139,4914,4915],{},"model"," 参数不是模型名称，而是",[123,4918,4919],{},"端点 ID","（endpoint ID）——你需要先在火山控制台为某个模型创建一个端点，然后用端点 ID 调用。这种设计的好处是端点级别可以做版本灰度、QPS 控制、计费归属，企业场景更灵活。",[37,4922,4923],{"id":4923},"模型家族",[47,4925,4926,4939],{},[50,4927,4928],{},[53,4929,4930,4932,4935,4937],{},[56,4931,619],{},[56,4933,4934],{},"定位",[56,4936,515],{},[56,4938,529],{},[66,4940,4941,4954,4968,4982,4995],{},[53,4942,4943,4946,4949,4952],{},[71,4944,4945],{},"doubao-1-5-pro-256k",[71,4947,4948],{},"旗舰",[71,4950,4951],{},"¥0.8\u002FM",[71,4953,686],{},[53,4955,4956,4959,4962,4965],{},[71,4957,4958],{},"doubao-1-5-pro-32k",[71,4960,4961],{},"标准",[71,4963,4964],{},"¥0.3\u002FM",[71,4966,4967],{},"¥0.6\u002FM",[53,4969,4970,4973,4976,4979],{},[71,4971,4972],{},"doubao-1-5-lite-32k",[71,4974,4975],{},"轻量",[71,4977,4978],{},"¥0.15\u002FM",[71,4980,4981],{},"¥0.45\u002FM",[53,4983,4984,4987,4989,4992],{},[71,4985,4986],{},"doubao-vision-pro",[71,4988,2568],{},[71,4990,4991],{},"¥3\u002FM",[71,4993,4994],{},"¥9\u002FM",[53,4996,4997,5000,5003,5006],{},[71,4998,4999],{},"doubao-embedding",[71,5001,5002],{},"向量",[71,5004,5005],{},"¥0.5\u002FM",[71,5007,5008],{},"—",[30,5010,5011,5013,5014,5017],{},[139,5012,4972],{}," 是国产里非常具竞争力的轻量选项——便宜过 ",[861,5015,649],{"href":5016},"\u002Fmodels\u002Fgpt-4o.html","，质量接近 GLM Air。",[26,5019,492],{"id":492},[47,5021,5022,5030],{},[50,5023,5024],{},[53,5025,5026,5028],{},[56,5027,501],{},[56,5029,1362],{},[66,5031,5032,5039,5045],{},[53,5033,5034,5036],{},[71,5035,515],{},[71,5037,5038],{},"¥0.8 \u002F 百万 token",[53,5040,5041,5043],{},[71,5042,529],{},[71,5044,4218],{},[53,5046,5047,5050],{},[71,5048,5049],{},"免费额度",[71,5051,5052],{},"个人开发者每月一定额度",[30,5054,5055,5056,5058],{},"与 ",[861,5057,4347],{"href":4489}," 持平，是国产模型中第二便宜的旗舰模型。",[26,5060,5061],{"id":5061},"适合什么场景",[30,5063,788],{},[568,5065,5066,5072,5078,5084,5089,5095,5101],{},[571,5067,5068,5071],{},[123,5069,5070],{},"字节系企业","：已用飞书\u002F抖音\u002F火山引擎，用豆包最自然",[571,5073,5074,5077],{},[123,5075,5076],{},"企业客服","：火山引擎 SLA 稳定，价格低",[571,5079,5080,5083],{},[123,5081,5082],{},"中文内容生成","：中文能力不错，价格低",[571,5085,5086,5088],{},[123,5087,2568],{},"：图片理解能力可满足基本需求",[571,5090,5091,5094],{},[123,5092,5093],{},"Coze 平台 Agent","：作为默认后端模型集成最深",[571,5096,5097,5100],{},[123,5098,5099],{},"抖音电商应用","：商品描述生成、评论分析、智能客服",[571,5102,5103,5105],{},[123,5104,4301],{},"：火山引擎专属实例（大客户）",[30,5107,5108],{},"❌ 不适合什么场景：",[568,5110,5111,5121,5127,5133,5139],{},[571,5112,5113,5116,5117,865,5119],{},[123,5114,5115],{},"编程主力","：能力不如 ",[861,5118,864],{"href":863},[861,5120,2504],{"href":4486},[571,5122,5123,5126],{},[123,5124,5125],{},"英文场景","：英文能力一般，跨境出海应用建议 Qwen",[571,5128,5129,5132],{},[123,5130,5131],{},"私有化研究 \u002F 微调","：不开源，无法在自有 GPU 上微调",[571,5134,5135,5138],{},[123,5136,5137],{},"社区生态","：不如 Llama\u002FQwen 开源生态丰富",[571,5140,5141,5144],{},[123,5142,5143],{},"复杂 Agent 多步推理","：稳定性不如 Claude",[26,5146,5148],{"id":5147},"实战与-coze-配合","实战：与 Coze 配合",[30,5150,5151],{},"豆包 + Coze 是字节给非技术人员设计的\"零代码 AI 应用\"组合：",[132,5153,5156],{"className":5154,"code":5155,"language":435},[1538],"[Coze 工作流]\n  用户问题\n    ↓\n  豆包模型分析意图\n    ↓\n  调用工具节点（搜索\u002F数据库\u002FAPI）\n    ↓\n  豆包生成最终回答\n    ↓\n  发送到飞书\u002F微信\u002F网页\n",[139,5157,5155],{"__ignoreMap":137},[30,5159,5160],{},"PM \u002F 运营 \u002F 客服主管不写代码就能搭出企业级 AI 应用，背后跑的就是豆包。",[26,5162,5164],{"id":5163},"doubao-vs-同档国产对比","Doubao vs 同档国产对比",[47,5166,5167,5181],{},[50,5168,5169],{},[53,5170,5171,5173,5175,5177,5179],{},[56,5172,1451],{},[56,5174,4552],{},[56,5176,2504],{},[56,5178,680],{},[56,5180,4347],{},[66,5182,5183,5197,5211,5223,5236,5248,5265,5277],{},[53,5184,5185,5187,5189,5192,5194],{},[71,5186,17],{},[71,5188,4556],{},[71,5190,5191],{},"91.2%",[71,5193,3729],{},[71,5195,5196],{},"86.7%",[53,5198,5199,5201,5203,5206,5208],{},[71,5200,3733],{},[71,5202,4560],{},[71,5204,5205],{},"92.3%",[71,5207,3734],{},[71,5209,5210],{},"88.9%",[53,5212,5213,5215,5217,5219,5221],{},[71,5214,2526],{},[71,5216,4601],{},[71,5218,2537],{},[71,5220,2537],{},[71,5222,2537],{},[53,5224,5225,5228,5230,5232,5234],{},[71,5226,5227],{},"价格 Input",[71,5229,4951],{},[71,5231,686],{},[71,5233,683],{},[71,5235,4951],{},[53,5237,5238,5240,5242,5244,5246],{},[71,5239,2987],{},[71,5241,2601],{},[71,5243,4405],{},[71,5245,1522],{},[71,5247,4408],{},[53,5249,5250,5253,5256,5259,5262],{},[71,5251,5252],{},"生态集成",[71,5254,5255],{},"字节全家桶",[71,5257,5258],{},"智谱独立",[71,5260,5261],{},"独立",[71,5263,5264],{},"阿里云全套",[53,5266,5267,5269,5271,5273,5275],{},[71,5268,2568],{},[71,5270,1522],{},[71,5272,1522],{},[71,5274,2601],{},[71,5276,1522],{},[53,5278,5279,5282,5285,5288,5290],{},[71,5280,5281],{},"Agent 平台",[71,5283,5284],{},"Coze（强）",[71,5286,5287],{},"元器",[71,5289,5261],{},[71,5291,5292],{},"百炼",[26,5294,825],{"id":825},[568,5296,5297,5313,5322,5328,5334],{},[571,5298,5299,5302,5303,5305,5306,5308,5309,5312],{},[123,5300,5301],{},"端点 ID 不是模型名","：很多开发者第一次接入时把模型名（如 ",[139,5304,4945],{},"）直接传给 ",[139,5307,4915],{}," 参数，会报错。必须先在控制台创建端点，用端点 ID（",[139,5310,5311],{},"ep-xxxxxxxxx","）调用。",[571,5314,5315,5321],{},[123,5316,5317,5320],{},[139,5318,5319],{},"base_url"," 注意区域","：北京 \u002F 上海等不同区域 URL 不同，企业需选最近 region。",[571,5323,5324,5327],{},[123,5325,5326],{},"私有部署门槛高","：要求年消费达标，不是小客户能用上的。",[571,5329,5330,5333],{},[123,5331,5332],{},"不要拿它写代码","：HumanEval 78.3% 远低于 GLM-5.2 \u002F DeepSeek-V3 \u002F Qwen3，AI 编程场景换模型。",[571,5335,5336,2617,5339,5341,5342,5345],{},[123,5337,5338],{},"多模态价格翻几倍",[139,5340,4986],{}," 是 ",[139,5343,5344],{},"doubao-1-5-pro"," 的 4 倍价格，按需切。",[26,5347,854],{"id":854},[568,5349,5350,5357,5365,5372],{},[571,5351,5352,5353,5356],{},"集成平台：",[861,5354,4703],{"href":5355},"\u002Fagent\u002Fplatform\u002Fcoze.html","（字节自家 Agent 平台）",[571,5358,4483,5359,865,5361,865,5363],{},[861,5360,2504],{"href":4486},[861,5362,4347],{"href":4489},[861,5364,680],{"href":3653},[571,5366,5367,5368],{},"多模态：",[861,5369,5371],{"href":5370},"\u002Fwiki\u002Fembedding.html","Embedding",[571,5373,2691,5374],{},[861,5375,2694],{"href":1776},[885,5377,5378],{},"html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .s4XuR, html code.shiki .s4XuR{--shiki-default:#E36209;--shiki-dark:#FFAB70}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":137,"searchDepth":174,"depth":174,"links":5380},[5381,5382,5388,5391,5392,5393,5394,5395,5396],{"id":28,"depth":162,"text":28},{"id":35,"depth":162,"text":35,"children":5383},[5384,5385,5386,5387],{"id":4578,"depth":174,"text":4579},{"id":4649,"depth":174,"text":4649},{"id":4732,"depth":174,"text":4732},{"id":2568,"depth":174,"text":2568},{"id":129,"depth":162,"text":130,"children":5389},[5390],{"id":4923,"depth":174,"text":4923},{"id":492,"depth":162,"text":492},{"id":5061,"depth":162,"text":5061},{"id":5147,"depth":162,"text":5148},{"id":5163,"depth":162,"text":5164},{"id":825,"depth":162,"text":825},{"id":854,"depth":162,"text":854},256000,"字节跳动豆包（Doubao）1.5 Pro 旗舰模型，256K 超长上下文 + 多模态原生支持，火山引擎稳定服务无境外延迟，输入 ¥0.8\u002FM 极低价，深度集成飞书 \u002F 抖音 \u002F 扣子（Coze）生态，国内合规场景首选。",{},"\u002Fmodels\u002Fdoubao-1-5-pro","Input ¥0.8\u002FM · Output ¥2\u002FM（火山引擎）",[2727,5403],"agent\u002Fplatform\u002Fcoze","2025-06-20",{"title":4552,"description":5398},"models\u002Fdoubao-1-5-pro",[5408,5409,5410,5411,5412],"256K 上下文，国产模型中较长","价格极低，与 Qwen 3 \u002F DeepSeek 持平","字节生态集成（飞书\u002F抖音\u002F剪映）","国内直连，火山引擎稳定","多模态支持（图片理解）",[5414,5415,5082,5416],"企业客服 \u002F 知识问答","飞书\u002F抖音生态应用开发","多模态图片理解","字节跳动","ByteDance",[5420,5421,5422,5423],"编程能力弱于 Claude\u002FGLM","英文能力一般","开源程度低（仅 API）","社区生态不如 Llama\u002FQwen","tKyMAP6G7Rt_g-EY6vJCl_rmARQBqlqIhiBzpULkULY",{"id":5426,"title":665,"apiCompatible":5427,"benchmarks":5429,"body":5436,"category":6180,"contextWindow":6181,"description":6182,"extension":910,"maxOutput":6183,"meta":6184,"navigation":177,"path":6185,"pricing":6186,"published":915,"relatedTools":6187,"releaseDate":6188,"seo":6189,"slug":6190,"stem":6191,"strengths":6192,"updated":915,"useCases":6198,"vendor":6203,"vendorEn":6203,"weaknesses":6204,"__hash__":6209},"models\u002Fmodels\u002Fgemini-2.5-flash.md",[5428],"google",[5430,5432,5434],{"name":17,"score":5431},"82.4%",{"name":20,"score":5433},"78.5%",{"name":952,"score":5435},"42.8%",{"type":23,"value":5437,"toc":6161},[5438,5440,5447,5449,5452,5522,5525,5529,5538,5540,5543,5545,5548,5550,5748,5757,5761,5764,5831,5834,5837,5840,5860,5863,5877,5880,5955,5958,5962,5968,6026,6033,6037,6039,6056,6058,6072,6076,6079,6085,6093,6095,6133,6135,6159],[26,5439,28],{"id":28},[30,5441,5442,5443,5446],{},"Gemini 2.5 Flash 是 ",[861,5444,1561],{"href":5445},"\u002Fmodels\u002Fgemini-2.5-pro.html"," 的轻量版，定位为\"极致性价比\"。Input $0.075\u002FM token——是 Claude Sonnet 4 的 1\u002F40，GPT-4o 的 1\u002F33。保留 100 万 token 上下文和多模态能力。",[26,5448,35],{"id":35},[37,5450,5451],{"id":5451},"全网最低价",[47,5453,5454,5465],{},[50,5455,5456],{},[53,5457,5458,5460,5462],{},[56,5459,619],{},[56,5461,2584],{},[56,5463,5464],{},"对比 Flash",[66,5466,5467,5476,5485,5494,5504,5513],{},[53,5468,5469,5471,5473],{},[71,5470,665],{},[71,5472,668],{},[71,5474,5475],{},"1×",[53,5477,5478,5480,5482],{},[71,5479,1561],{},[71,5481,2589],{},[71,5483,5484],{},"17×",[53,5486,5487,5489,5491],{},[71,5488,649],{},[71,5490,652],{},[71,5492,5493],{},"2×",[53,5495,5496,5498,5501],{},[71,5497,2845],{},[71,5499,5500],{},"$2.5\u002FM",[71,5502,5503],{},"33×",[53,5505,5506,5508,5510],{},[71,5507,9],{},[71,5509,518],{},[71,5511,5512],{},"13×",[53,5514,5515,5517,5519],{},[71,5516,864],{},[71,5518,521],{},[71,5520,5521],{},"40×",[30,5523,5524],{},"100 亿 token 的 Input 费用：Flash $750，Sonnet 4 $30,000。",[37,5526,5528],{"id":5527},"_100-万-token-上下文","100 万 token 上下文",[30,5530,5531,5532,5535,5536,126],{},"与 Pro 版共享 100 万 token 上下文窗口。可以用极低成本处理超长文档、整个代码仓库。但",[123,5533,5534],{},"实测同样有\"中间遗忘\"问题","，超过 200K 后质量明显下降，详见 ",[861,5537,883],{"href":882},[37,5539,2568],{"id":2568},[30,5541,5542],{},"支持图片、视频、音频输入。视频理解能力继承自 Pro 版，质量略低但速度更快——适合海量视频内容审核 \u002F 打标场景。",[37,5544,629],{"id":629},[30,5546,5547],{},"首 token 延迟 ~0.3s，流式吞吐 ~120 tok\u002Fs，是同档最快的之一。对实时聊天 \u002F 流式输出体验非常重要。",[26,5549,130],{"id":129},[132,5551,5553],{"className":134,"code":5552,"language":136,"meta":137,"style":137},"from google import genai\nclient = genai.Client(api_key=\"AIza...\")\n\n# 关闭 thinking 加速（Flash 默认也开了 thinking，但对简单任务无必要）\nresp = client.models.generate_content(\n    model=\"gemini-2.5-flash\",\n    contents=\"把这段文本分类：\" + text,\n    config={\n        \"temperature\": 0,\n        \"max_output_tokens\": 100,\n        \"thinking_config\": {\"thinking_budget\": 0},   # 关掉省钱省时间\n        \"response_mime_type\": \"application\u002Fjson\",\n        \"response_schema\": {\n            \"type\": \"object\",\n            \"properties\": {\"category\": {\"type\": \"string\"}},\n        },\n    },\n)\n",[139,5554,5555,5567,5585,5589,5594,5603,5614,5630,5640,5651,5662,5682,5694,5701,5712,5734,5739,5744],{"__ignoreMap":137},[142,5556,5557,5559,5562,5564],{"class":144,"line":145},[142,5558,149],{"class":148},[142,5560,5561],{"class":152}," google ",[142,5563,156],{"class":148},[142,5565,5566],{"class":152}," genai\n",[142,5568,5569,5571,5573,5576,5578,5580,5583],{"class":144,"line":162},[142,5570,165],{"class":152},[142,5572,168],{"class":148},[142,5574,5575],{"class":152}," genai.Client(",[142,5577,1836],{"class":200},[142,5579,168],{"class":148},[142,5581,5582],{"class":206},"\"AIza...\"",[142,5584,480],{"class":152},[142,5586,5587],{"class":144,"line":174},[142,5588,178],{"emptyLinePlaceholder":177},[142,5590,5591],{"class":144,"line":181},[142,5592,5593],{"class":184},"# 关闭 thinking 加速（Flash 默认也开了 thinking，但对简单任务无必要）\n",[142,5595,5596,5598,5600],{"class":144,"line":188},[142,5597,1007],{"class":152},[142,5599,168],{"class":148},[142,5601,5602],{"class":152}," client.models.generate_content(\n",[142,5604,5605,5607,5609,5612],{"class":144,"line":197},[142,5606,201],{"class":200},[142,5608,168],{"class":148},[142,5610,5611],{"class":206},"\"gemini-2.5-flash\"",[142,5613,210],{"class":152},[142,5615,5616,5619,5621,5624,5627],{"class":144,"line":5},[142,5617,5618],{"class":200},"    contents",[142,5620,168],{"class":148},[142,5622,5623],{"class":206},"\"把这段文本分类：\"",[142,5625,5626],{"class":148}," +",[142,5628,5629],{"class":152}," text,\n",[142,5631,5632,5635,5637],{"class":144,"line":230},[142,5633,5634],{"class":200},"    config",[142,5636,168],{"class":148},[142,5638,5639],{"class":152},"{\n",[142,5641,5642,5645,5647,5649],{"class":144,"line":243},[142,5643,5644],{"class":206},"        \"temperature\"",[142,5646,257],{"class":152},[142,5648,238],{"class":220},[142,5650,210],{"class":152},[142,5652,5653,5656,5658,5660],{"class":144,"line":272},[142,5654,5655],{"class":206},"        \"max_output_tokens\"",[142,5657,257],{"class":152},[142,5659,403],{"class":220},[142,5661,210],{"class":152},[142,5663,5664,5667,5669,5672,5674,5676,5679],{"class":144,"line":284},[142,5665,5666],{"class":206},"        \"thinking_config\"",[142,5668,2052],{"class":152},[142,5670,5671],{"class":206},"\"thinking_budget\"",[142,5673,257],{"class":152},[142,5675,238],{"class":220},[142,5677,5678],{"class":152},"},   ",[142,5680,5681],{"class":184},"# 关掉省钱省时间\n",[142,5683,5684,5687,5689,5692],{"class":144,"line":299},[142,5685,5686],{"class":206},"        \"response_mime_type\"",[142,5688,257],{"class":152},[142,5690,5691],{"class":206},"\"application\u002Fjson\"",[142,5693,210],{"class":152},[142,5695,5696,5699],{"class":144,"line":471},[142,5697,5698],{"class":206},"        \"response_schema\"",[142,5700,382],{"class":152},[142,5702,5703,5705,5707,5710],{"class":144,"line":477},[142,5704,2025],{"class":206},[142,5706,257],{"class":152},[142,5708,5709],{"class":206},"\"object\"",[142,5711,210],{"class":152},[142,5713,5714,5717,5719,5722,5724,5726,5728,5731],{"class":144,"line":483},[142,5715,5716],{"class":206},"            \"properties\"",[142,5718,2052],{"class":152},[142,5720,5721],{"class":206},"\"category\"",[142,5723,2052],{"class":152},[142,5725,1046],{"class":206},[142,5727,257],{"class":152},[142,5729,5730],{"class":206},"\"string\"",[142,5732,5733],{"class":152},"}},\n",[142,5735,5736],{"class":144,"line":3185},[142,5737,5738],{"class":152},"        },\n",[142,5740,5741],{"class":144,"line":3190},[142,5742,5743],{"class":152},"    },\n",[142,5745,5746],{"class":144,"line":3206},[142,5747,480],{"class":152},[30,5749,5750,5756],{},[123,5751,5752,5755],{},[139,5753,5754],{},"thinking_budget=0"," 是 Flash 的重要省钱开关","——批量分类、抽取这种\"无脑活\"完全不需要推理，关掉后速度 +30%、token -20%。",[37,5758,5760],{"id":5759},"batch-api","Batch API",[30,5762,5763],{},"Gemini Batch API 提供 -50% 折扣（Flash Input 直接砍到 $0.0375\u002FM），24 小时内出结果：",[132,5765,5767],{"className":134,"code":5766,"language":136,"meta":137,"style":137},"batch = client.batches.create(\n    model=\"gemini-2.5-flash\",\n    requests=[\n        {\"contents\": \"分类：\" + t} for t in texts\n    ],\n)\n",[139,5768,5769,5778,5788,5796,5823,5827],{"__ignoreMap":137},[142,5770,5771,5773,5775],{"class":144,"line":145},[142,5772,327],{"class":152},[142,5774,168],{"class":148},[142,5776,5777],{"class":152}," client.batches.create(\n",[142,5779,5780,5782,5784,5786],{"class":144,"line":162},[142,5781,201],{"class":200},[142,5783,168],{"class":148},[142,5785,5611],{"class":206},[142,5787,210],{"class":152},[142,5789,5790,5792,5794],{"class":144,"line":174},[142,5791,337],{"class":200},[142,5793,168],{"class":148},[142,5795,342],{"class":152},[142,5797,5798,5800,5803,5805,5808,5810,5813,5815,5818,5820],{"class":144,"line":181},[142,5799,1292],{"class":152},[142,5801,5802],{"class":206},"\"contents\"",[142,5804,257],{"class":152},[142,5806,5807],{"class":206},"\"分类：\"",[142,5809,5626],{"class":148},[142,5811,5812],{"class":152}," t} ",[142,5814,1103],{"class":148},[142,5816,5817],{"class":152}," t ",[142,5819,293],{"class":148},[142,5821,5822],{"class":152}," texts\n",[142,5824,5825],{"class":144,"line":188},[142,5826,1321],{"class":152},[142,5828,5829],{"class":144,"line":197},[142,5830,480],{"class":152},[30,5832,5833],{},"万级离线任务的成本基本可以忽略不计。",[26,5835,5836],{"id":5836},"实际表现",[30,5838,5839],{},"Flash 保留约 Pro 85% 的能力。在以下场景表现优秀：",[568,5841,5842,5845,5848,5851,5854,5857],{},[571,5843,5844],{},"文本分类、情感分析",[571,5846,5847],{},"文档摘要、信息抽取",[571,5849,5850],{},"简单问答、FAQ",[571,5852,5853],{},"代码补全（简单场景）",[571,5855,5856],{},"视频内容打标 \u002F 审核",[571,5858,5859],{},"多语言翻译",[30,5861,5862],{},"以下场景建议用 Pro：",[568,5864,5865,5868,5871,5874],{},[571,5866,5867],{},"复杂推理 \u002F 数学",[571,5869,5870],{},"精确编程 \u002F 多文件改写",[571,5872,5873],{},"需要深度分析的长文档",[571,5875,5876],{},"法律 \u002F 医疗等严格场景",[26,5878,5879],{"id":5879},"典型用量成本",[47,5881,5882,5897],{},[50,5883,5884],{},[53,5885,5886,5888,5891,5894],{},[56,5887,58],{},[56,5889,5890],{},"日用量",[56,5892,5893],{},"Flash 月成本",[56,5895,5896],{},"Sonnet 4 月成本",[66,5898,5899,5913,5927,5941],{},[53,5900,5901,5904,5907,5910],{},[71,5902,5903],{},"客服 Bot（1 万次\u002F天）",[71,5905,5906],{},"~500M Input + 100M Output",[71,5908,5909],{},"~$2.3",[71,5911,5912],{},"~$95",[53,5914,5915,5918,5921,5924],{},[71,5916,5917],{},"文档摘要（1 万篇\u002F天）",[71,5919,5920],{},"~2B Input + 200M Output",[71,5922,5923],{},"~$6.3",[71,5925,5926],{},"~$270",[53,5928,5929,5932,5935,5938],{},[71,5930,5931],{},"实时翻译",[71,5933,5934],{},"~10B Input + 10B Output",[71,5936,5937],{},"~$112",[71,5939,5940],{},"~$5,400",[53,5942,5943,5946,5949,5952],{},[71,5944,5945],{},"内容审核（10 万条\u002F天）",[71,5947,5948],{},"~3B Input + 30M Output",[71,5950,5951],{},"~$7.1",[71,5953,5954],{},"~$285",[30,5956,5957],{},"用 Claude Sonnet 4 做同样的事，成本是 40 倍——很多生意做不起来。",[26,5959,5961],{"id":5960},"flash-vs-flash-lite","Flash vs Flash-Lite",[30,5963,5964,5965,2617],{},"Google 还有更轻的 ",[139,5966,5967],{},"gemini-2.5-flash-lite",[47,5969,5970,5982],{},[50,5971,5972],{},[53,5973,5974,5976,5979],{},[56,5975,1451],{},[56,5977,5978],{},"Flash",[56,5980,5981],{},"Flash-Lite",[66,5983,5984,5993,6001,6009,6018],{},[53,5985,5986,5988,5990],{},[71,5987,515],{},[71,5989,668],{},[71,5991,5992],{},"$0.0375\u002FM",[53,5994,5995,5997,5999],{},[71,5996,529],{},[71,5998,548],{},[71,6000,652],{},[53,6002,6003,6005,6007],{},[71,6004,2526],{},[71,6006,2534],{},[71,6008,2534],{},[53,6010,6011,6013,6015],{},[71,6012,20],{},[71,6014,5433],{},[71,6016,6017],{},"~73%",[53,6019,6020,6022,6024],{},[71,6021,629],{},[71,6023,644],{},[71,6025,644],{},[30,6027,6028,6029,6032],{},"Flash-Lite 适合",[123,6030,6031],{},"极端高吞吐","场景（百亿级 token \u002F 月）。但能力差距明显，能用 Flash 就别用 Lite。",[26,6034,6036],{"id":6035},"适合-不适合","适合 \u002F 不适合",[30,6038,788],{},[568,6040,6041,6044,6047,6050,6053],{},[571,6042,6043],{},"内容分类 \u002F 情感分析 \u002F 关键词抽取",[571,6045,6046],{},"客服 Bot 第一线（复杂问题升级到 Pro）",[571,6048,6049],{},"海量文档摘要 \u002F 知识库构建",[571,6051,6052],{},"视频审核 \u002F 直播切片打标",[571,6054,6055],{},"离线 batch 数据清洗",[30,6057,808],{},[568,6059,6060,6063,6066,6069],{},[571,6061,6062],{},"AI 编程主力（SWE-bench 表现差）",[571,6064,6065],{},"复杂 Agent 多步推理（工具调用稳定性不够）",[571,6067,6068],{},"法律 \u002F 医疗严格场景（拒答率低，幻觉风险）",[571,6070,6071],{},"长文创意写作（输出质量明显不如 Sonnet\u002FOpus）",[26,6073,6075],{"id":6074},"实战分层路由模式","实战：分层路由模式",[30,6077,6078],{},"很多生产系统用 Flash + Pro\u002FSonnet 双模型分层：",[132,6080,6083],{"className":6081,"code":6082,"language":435},[1538],"用户请求\n  │\n  ├─ [Flash 路由器] 快速判断任务复杂度\n  │     ├─ 简单（80%） → Flash 直接回答\n  │     └─ 复杂（20%） → 转 Pro \u002F Claude Sonnet 4\n  │\n  └─ 综合：均价低、用户体验不打折\n",[139,6084,6082],{"__ignoreMap":137},[30,6086,6087,6088,6092],{},"实际可能：80% 流量走 Flash（",[6089,6090,6091],"del",{},"$0.1\u002F万条），20% 走 Sonnet 4（","$10\u002F万条），均价 $2\u002F万条——比纯 Sonnet 4 便宜 5 倍。",[26,6094,825],{"id":825},[568,6096,6097,6106,6115,6121,6127],{},[571,6098,6099,6105],{},[123,6100,6101,6104],{},[139,6102,6103],{},"thinking_budget"," 默认非 0","：Flash 也会\"想一会儿\"，简单任务必须显式关掉。",[571,6107,6108,2617,6111,6114],{},[123,6109,6110],{},"JSON 输出务必带 schema",[139,6112,6113],{},"response_mime_type=application\u002Fjson"," 单独用不够强约束。",[571,6116,6117,6120],{},[123,6118,6119],{},"多语言慎重","：英文中文表现 OK，小语种（日韩阿拉伯等）质量明显不如 Pro。",[571,6122,6123,6126],{},[123,6124,6125],{},"视频不要超过 1 小时","：尽管 1M 上下文支持，但长视频\"中间遗忘\"严重，建议切片处理。",[571,6128,6129,6132],{},[123,6130,6131],{},"国内访问","：和 Pro 一样需中转，OpenRouter 或自建 Cloudflare Workers。",[26,6134,854],{"id":854},[568,6136,6137,6142,6150,6154],{},[571,6138,6139,6140],{},"上位旗舰：",[861,6141,1561],{"href":5445},[571,6143,6144,6145,865,6148],{},"同档对比：",[861,6146,9],{"href":6147},"\u002Fmodels\u002Fclaude-haiku-4.html",[861,6149,680],{"href":3653},[571,6151,4497,6152],{},[861,6153,876],{"href":875},[571,6155,6156,6157],{},"分层架构：",[861,6158,2694],{"href":1776},[885,6160,5378],{},{"title":137,"searchDepth":174,"depth":174,"links":6162},[6163,6164,6170,6173,6174,6175,6176,6177,6178,6179],{"id":28,"depth":162,"text":28},{"id":35,"depth":162,"text":35,"children":6165},[6166,6167,6168,6169],{"id":5451,"depth":174,"text":5451},{"id":5527,"depth":174,"text":5528},{"id":2568,"depth":174,"text":2568},{"id":629,"depth":174,"text":629},{"id":129,"depth":162,"text":130,"children":6171},[6172],{"id":5759,"depth":174,"text":5760},{"id":5836,"depth":162,"text":5836},{"id":5879,"depth":162,"text":5879},{"id":5960,"depth":162,"text":5961},{"id":6035,"depth":162,"text":6036},{"id":6074,"depth":162,"text":6075},{"id":825,"depth":162,"text":825},{"id":854,"depth":162,"text":854},"multimodal",1000000,"Google Gemini 2.5 Flash 极致性价比模型，输入 $0.075\u002FM 全网最低之一，100 万 token 超长上下文 + 原生多模态（图像 \u002F 音频 \u002F 视频），适合高吞吐 RAG、批量文档分析与成本敏感型生产场景。",65536,{},"\u002Fmodels\u002Fgemini-2.5-flash","Input $0.075\u002FM · Output $0.30\u002FM · 闪存 $0.01875\u002FM",[3699],"2025-06-17",{"title":665,"description":6182},"gemini-2.5-flash","models\u002Fgemini-2.5-flash",[6193,6194,6195,6196,6197],"全网最便宜的模型，Input 仅 $0.075\u002FM","100 万 token 上下文，与 Pro 版一致","速度极快，适合实时场景","多模态支持（图片\u002F视频\u002F音频）","65K 输出窗口",[6199,6200,6201,6202],"高吞吐批量处理（分类\u002F摘要\u002F抽取）","实时聊天机器人","长文档快速摘要","视频内容理解","Google",[6205,6206,6207,6208],"推理能力弱于 Pro 版","编程能力一般，不如 Claude","国内无法直连","长上下文下质量下降明显","jyUQukAObYi7bxN4nqQbhfal2GelrEdiwVs2LNjEsGE",{"id":6211,"title":1561,"apiCompatible":6212,"benchmarks":6213,"body":6219,"category":6180,"contextWindow":6181,"description":7217,"extension":910,"maxOutput":6183,"meta":7218,"navigation":177,"path":7219,"pricing":7220,"published":915,"relatedTools":7221,"releaseDate":7222,"seo":7223,"slug":7224,"stem":7225,"strengths":7226,"updated":915,"useCases":7232,"vendor":6203,"vendorEn":6203,"weaknesses":7237,"__hash__":7242},"models\u002Fmodels\u002Fgemini-2.5-pro.md",[5428],[6214,6215,6217,6218],{"name":14,"score":2518},{"name":17,"score":6216},"92.1%",{"name":20,"score":18},{"name":952,"score":1575},{"type":23,"value":6220,"toc":7197},[6221,6223,6226,6228,6230,6233,6247,6257,6259,6262,6327,6331,6338,6440,6445,6449,6455,6466,6469,6471,6475,6599,6603,6755,6762,6764,6844,6846,6913,6926,6929,6932,6986,6989,6992,6995,7009,7012,7102,7106,7117,7119,7170,7172,7194],[26,6222,28],{"id":28},[30,6224,6225],{},"Gemini 2.5 Pro 是 Google 于 2025 年 3 月发布的旗舰模型，最大亮点是 100 万 token 的上下文窗口——全网最长。可以一次性处理整本书、整个代码仓库或数小时的视频。",[26,6227,35],{"id":35},[37,6229,5528],{"id":5527},[30,6231,6232],{},"这是 Gemini 2.5 Pro 的杀手锏。100 万 token 约等于：",[568,6234,6235,6238,6241,6244],{},[571,6236,6237],{},"一本 75 万字的中文小说",[571,6239,6240],{},"一个 10 万行代码的中型项目",[571,6242,6243],{},"10 小时 1080p 视频",[571,6245,6246],{},"一次完整的学术会议所有论文",[30,6248,6249,6250,6252,6253,6256],{},"但要注意\"中间遗忘\"——超过 500K 后召回率明显下降，详见 ",[861,6251,883],{"href":882},"。Google 自己的 Needle-in-Haystack 测试虽然全 100% 命中，但",[123,6254,6255],{},"真实业务中的多 hop 推理","在长上下文下仍然不稳。",[37,6258,2568],{"id":2568},[30,6260,6261],{},"原生支持图片、视频、音频输入。视频理解能力是所有大模型中最强的——可以精确识别视频中的动作、物体、场景、对话内容。",[132,6263,6265],{"className":134,"code":6264,"language":136,"meta":137,"style":137},"# 视频输入\ncontent_part = {\n    \"file_data\": {\n        \"mime_type\": \"video\u002Fmp4\",\n        \"file_uri\": \"gs:\u002F\u002Fbucket\u002Flecture.mp4\",\n    }\n}\n# 1 小时视频约消耗 100K-200K token\n",[139,6266,6267,6272,6282,6289,6301,6313,6318,6322],{"__ignoreMap":137},[142,6268,6269],{"class":144,"line":145},[142,6270,6271],{"class":184},"# 视频输入\n",[142,6273,6274,6277,6279],{"class":144,"line":162},[142,6275,6276],{"class":152},"content_part ",[142,6278,168],{"class":148},[142,6280,6281],{"class":152}," {\n",[142,6283,6284,6287],{"class":144,"line":174},[142,6285,6286],{"class":206},"    \"file_data\"",[142,6288,382],{"class":152},[142,6290,6291,6294,6296,6299],{"class":144,"line":181},[142,6292,6293],{"class":206},"        \"mime_type\"",[142,6295,257],{"class":152},[142,6297,6298],{"class":206},"\"video\u002Fmp4\"",[142,6300,210],{"class":152},[142,6302,6303,6306,6308,6311],{"class":144,"line":188},[142,6304,6305],{"class":206},"        \"file_uri\"",[142,6307,257],{"class":152},[142,6309,6310],{"class":206},"\"gs:\u002F\u002Fbucket\u002Flecture.mp4\"",[142,6312,210],{"class":152},[142,6314,6315],{"class":144,"line":197},[142,6316,6317],{"class":152},"    }\n",[142,6319,6320],{"class":144,"line":5},[142,6321,1316],{"class":152},[142,6323,6324],{"class":144,"line":230},[142,6325,6326],{"class":184},"# 1 小时视频约消耗 100K-200K token\n",[37,6328,6330],{"id":6329},"thinking-模式自适应推理","Thinking 模式（自适应推理）",[30,6332,6333,6334,6337],{},"Gemini 2.5 系列内置 ",[123,6335,6336],{},"dynamic thinking","——模型自动决定是否要\"想一想\"。也可以手动控制：",[132,6339,6341],{"className":134,"code":6340,"language":136,"meta":137,"style":137},"from google import genai\n\nclient = genai.Client(api_key=\"...\")\n\nresp = client.models.generate_content(\n    model=\"gemini-2.5-pro\",\n    contents=\"证明费马小定理\",\n    config={\n        \"thinking_config\": {\"thinking_budget\": 8000},  # 0 = 关闭\n    },\n)\n",[139,6342,6343,6353,6357,6373,6377,6385,6396,6407,6415,6432,6436],{"__ignoreMap":137},[142,6344,6345,6347,6349,6351],{"class":144,"line":145},[142,6346,149],{"class":148},[142,6348,5561],{"class":152},[142,6350,156],{"class":148},[142,6352,5566],{"class":152},[142,6354,6355],{"class":144,"line":162},[142,6356,178],{"emptyLinePlaceholder":177},[142,6358,6359,6361,6363,6365,6367,6369,6371],{"class":144,"line":174},[142,6360,165],{"class":152},[142,6362,168],{"class":148},[142,6364,5575],{"class":152},[142,6366,1836],{"class":200},[142,6368,168],{"class":148},[142,6370,4815],{"class":206},[142,6372,480],{"class":152},[142,6374,6375],{"class":144,"line":181},[142,6376,178],{"emptyLinePlaceholder":177},[142,6378,6379,6381,6383],{"class":144,"line":188},[142,6380,1007],{"class":152},[142,6382,168],{"class":148},[142,6384,5602],{"class":152},[142,6386,6387,6389,6391,6394],{"class":144,"line":197},[142,6388,201],{"class":200},[142,6390,168],{"class":148},[142,6392,6393],{"class":206},"\"gemini-2.5-pro\"",[142,6395,210],{"class":152},[142,6397,6398,6400,6402,6405],{"class":144,"line":5},[142,6399,5618],{"class":200},[142,6401,168],{"class":148},[142,6403,6404],{"class":206},"\"证明费马小定理\"",[142,6406,210],{"class":152},[142,6408,6409,6411,6413],{"class":144,"line":230},[142,6410,5634],{"class":200},[142,6412,168],{"class":148},[142,6414,5639],{"class":152},[142,6416,6417,6419,6421,6423,6425,6427,6429],{"class":144,"line":243},[142,6418,5666],{"class":206},[142,6420,2052],{"class":152},[142,6422,5671],{"class":206},[142,6424,257],{"class":152},[142,6426,1262],{"class":220},[142,6428,2062],{"class":152},[142,6430,6431],{"class":184},"# 0 = 关闭\n",[142,6433,6434],{"class":144,"line":272},[142,6435,5743],{"class":152},[142,6437,6438],{"class":144,"line":284},[142,6439,480],{"class":152},[30,6441,6442,6444],{},[139,6443,5754],{}," 禁用推理（变成快速模式），数值越大推理越深。",[37,6446,6448],{"id":6447},"flash-版本","Flash 版本",[30,6450,6451,6454],{},[861,6452,665],{"href":6453},"\u002Fmodels\u002Fgemini-2.5-flash.html"," 是 Pro 的轻量版：",[568,6456,6457,6460,6463],{},[571,6458,6459],{},"速度：Pro 的 5-10 倍",[571,6461,6462],{},"价格：Input $0.075\u002FM（Pro 的 6%）",[571,6464,6465],{},"能力：保留 Pro 约 85% 的能力",[30,6467,6468],{},"对于高吞吐场景（客服 bot、批量分类、内容审核），Flash 的性价比无敌。",[26,6470,130],{"id":129},[37,6472,6474],{"id":6473},"python-sdk新版-google-genai","Python SDK（新版 google-genai）",[132,6476,6478],{"className":134,"code":6477,"language":136,"meta":137,"style":137},"from google import genai\n\nclient = genai.Client(api_key=\"AIza...\")\n\nresp = client.models.generate_content(\n    model=\"gemini-2.5-pro\",\n    contents=\"Hello, summarize this PDF.\",\n    config={\n        \"temperature\": 1.0,\n        \"max_output_tokens\": 8000,\n        \"response_mime_type\": \"application\u002Fjson\",   # 强制 JSON 输出\n    },\n)\nprint(resp.text)\n",[139,6479,6480,6490,6494,6510,6514,6522,6532,6543,6551,6561,6571,6584,6588,6592],{"__ignoreMap":137},[142,6481,6482,6484,6486,6488],{"class":144,"line":145},[142,6483,149],{"class":148},[142,6485,5561],{"class":152},[142,6487,156],{"class":148},[142,6489,5566],{"class":152},[142,6491,6492],{"class":144,"line":162},[142,6493,178],{"emptyLinePlaceholder":177},[142,6495,6496,6498,6500,6502,6504,6506,6508],{"class":144,"line":174},[142,6497,165],{"class":152},[142,6499,168],{"class":148},[142,6501,5575],{"class":152},[142,6503,1836],{"class":200},[142,6505,168],{"class":148},[142,6507,5582],{"class":206},[142,6509,480],{"class":152},[142,6511,6512],{"class":144,"line":181},[142,6513,178],{"emptyLinePlaceholder":177},[142,6515,6516,6518,6520],{"class":144,"line":188},[142,6517,1007],{"class":152},[142,6519,168],{"class":148},[142,6521,5602],{"class":152},[142,6523,6524,6526,6528,6530],{"class":144,"line":197},[142,6525,201],{"class":200},[142,6527,168],{"class":148},[142,6529,6393],{"class":206},[142,6531,210],{"class":152},[142,6533,6534,6536,6538,6541],{"class":144,"line":5},[142,6535,5618],{"class":200},[142,6537,168],{"class":148},[142,6539,6540],{"class":206},"\"Hello, summarize this PDF.\"",[142,6542,210],{"class":152},[142,6544,6545,6547,6549],{"class":144,"line":230},[142,6546,5634],{"class":200},[142,6548,168],{"class":148},[142,6550,5639],{"class":152},[142,6552,6553,6555,6557,6559],{"class":144,"line":243},[142,6554,5644],{"class":206},[142,6556,257],{"class":152},[142,6558,1273],{"class":220},[142,6560,210],{"class":152},[142,6562,6563,6565,6567,6569],{"class":144,"line":272},[142,6564,5655],{"class":206},[142,6566,257],{"class":152},[142,6568,1262],{"class":220},[142,6570,210],{"class":152},[142,6572,6573,6575,6577,6579,6581],{"class":144,"line":284},[142,6574,5686],{"class":206},[142,6576,257],{"class":152},[142,6578,5691],{"class":206},[142,6580,2886],{"class":152},[142,6582,6583],{"class":184},"# 强制 JSON 输出\n",[142,6585,6586],{"class":144,"line":299},[142,6587,5743],{"class":152},[142,6589,6590],{"class":144,"line":471},[142,6591,480],{"class":152},[142,6593,6594,6596],{"class":144,"line":477},[142,6595,1330],{"class":220},[142,6597,6598],{"class":152},"(resp.text)\n",[37,6600,6602],{"id":6601},"context-caching显式-api与-openai-自动-cache-不同","Context Caching（显式 API，与 OpenAI 自动 cache 不同）",[132,6604,6606],{"className":134,"code":6605,"language":136,"meta":137,"style":137},"# 1. 创建 cache\ncache = client.caches.create(\n    model=\"gemini-2.5-pro\",\n    config={\n        \"contents\": [{\"role\": \"user\", \"parts\": [{\"text\": LONG_DOCUMENT}]}],\n        \"ttl\": \"3600s\",   # 1 小时\n    },\n)\n\n# 2. 用 cache 名引用\nresp = client.models.generate_content(\n    model=\"gemini-2.5-pro\",\n    contents=\"What are the key findings?\",\n    config={\"cached_content\": cache.name},\n)\n# Input 价格 -75%，再加按小时存储费\n",[139,6607,6608,6613,6623,6633,6641,6671,6686,6690,6694,6698,6703,6711,6721,6732,6746,6750],{"__ignoreMap":137},[142,6609,6610],{"class":144,"line":145},[142,6611,6612],{"class":184},"# 1. 创建 cache\n",[142,6614,6615,6618,6620],{"class":144,"line":162},[142,6616,6617],{"class":152},"cache ",[142,6619,168],{"class":148},[142,6621,6622],{"class":152}," client.caches.create(\n",[142,6624,6625,6627,6629,6631],{"class":144,"line":174},[142,6626,201],{"class":200},[142,6628,168],{"class":148},[142,6630,6393],{"class":206},[142,6632,210],{"class":152},[142,6634,6635,6637,6639],{"class":144,"line":181},[142,6636,5634],{"class":200},[142,6638,168],{"class":148},[142,6640,5639],{"class":152},[142,6642,6643,6646,6648,6650,6652,6654,6656,6659,6661,6663,6665,6668],{"class":144,"line":188},[142,6644,6645],{"class":206},"        \"contents\"",[142,6647,413],{"class":152},[142,6649,254],{"class":206},[142,6651,257],{"class":152},[142,6653,260],{"class":206},[142,6655,263],{"class":152},[142,6657,6658],{"class":206},"\"parts\"",[142,6660,413],{"class":152},[142,6662,2030],{"class":206},[142,6664,257],{"class":152},[142,6666,6667],{"class":220},"LONG_DOCUMENT",[142,6669,6670],{"class":152},"}]}],\n",[142,6672,6673,6676,6678,6681,6683],{"class":144,"line":197},[142,6674,6675],{"class":206},"        \"ttl\"",[142,6677,257],{"class":152},[142,6679,6680],{"class":206},"\"3600s\"",[142,6682,2886],{"class":152},[142,6684,6685],{"class":184},"# 1 小时\n",[142,6687,6688],{"class":144,"line":5},[142,6689,5743],{"class":152},[142,6691,6692],{"class":144,"line":230},[142,6693,480],{"class":152},[142,6695,6696],{"class":144,"line":243},[142,6697,178],{"emptyLinePlaceholder":177},[142,6699,6700],{"class":144,"line":272},[142,6701,6702],{"class":184},"# 2. 用 cache 名引用\n",[142,6704,6705,6707,6709],{"class":144,"line":284},[142,6706,1007],{"class":152},[142,6708,168],{"class":148},[142,6710,5602],{"class":152},[142,6712,6713,6715,6717,6719],{"class":144,"line":299},[142,6714,201],{"class":200},[142,6716,168],{"class":148},[142,6718,6393],{"class":206},[142,6720,210],{"class":152},[142,6722,6723,6725,6727,6730],{"class":144,"line":471},[142,6724,5618],{"class":200},[142,6726,168],{"class":148},[142,6728,6729],{"class":206},"\"What are the key findings?\"",[142,6731,210],{"class":152},[142,6733,6734,6736,6738,6740,6743],{"class":144,"line":477},[142,6735,5634],{"class":200},[142,6737,168],{"class":148},[142,6739,363],{"class":152},[142,6741,6742],{"class":206},"\"cached_content\"",[142,6744,6745],{"class":152},": cache.name},\n",[142,6747,6748],{"class":144,"line":483},[142,6749,480],{"class":152},[142,6751,6752],{"class":144,"line":3185},[142,6753,6754],{"class":184},"# Input 价格 -75%，再加按小时存储费\n",[30,6756,6757,6758,6761],{},"显式 cache 适合",[123,6759,6760],{},"少量超长文档反复问","的场景（合同、财报）。注意 cache 不是免费——按存储时长计费 $4.50\u002FM-token\u002Fhour。",[26,6763,2222],{"id":2222},[47,6765,6766,6776],{},[50,6767,6768],{},[53,6769,6770,6772,6774],{},[56,6771,2231],{},[56,6773,713],{},[56,6775,4109],{},[66,6777,6778,6790,6802,6814,6826],{},[53,6779,6780,6784,6787],{},[71,6781,6782],{},[139,6783,1344],{},[71,6785,6786],{},"1.0（默认）",[71,6788,6789],{},"推理模型，不要改",[53,6791,6792,6796,6799],{},[71,6793,6794],{},[139,6795,4140],{},[71,6797,6798],{},"0.95（默认）",[71,6800,6801],{},"Gemini 默认就开了 nucleus sampling",[53,6803,6804,6809,6811],{},[71,6805,6806],{},[139,6807,6808],{},"max_output_tokens",[71,6810,4155],{},[71,6812,6813],{},"否则默认 8192，长输出会截断",[53,6815,6816,6820,6823],{},[71,6817,6818],{},[139,6819,6103],{},[71,6821,6822],{},"0 \u002F 8000 \u002F 32000",[71,6824,6825],{},"简单任务 0，复杂推理拉满",[53,6827,6828,6833,6838],{},[71,6829,6830],{},[139,6831,6832],{},"response_mime_type",[71,6834,6835],{},[139,6836,6837],{},"application\u002Fjson",[71,6839,6840,6841],{},"强制 JSON，配合 ",[139,6842,6843],{},"response_schema",[26,6845,492],{"id":492},[47,6847,6848,6862],{},[50,6849,6850],{},[53,6851,6852,6854,6857,6860],{},[56,6853,501],{},[56,6855,6856],{},"Pro（≤200K input）",[56,6858,6859],{},"Pro（>200K input）",[56,6861,5978],{},[66,6863,6864,6875,6886,6900],{},[53,6865,6866,6868,6870,6873],{},[71,6867,515],{},[71,6869,2589],{},[71,6871,6872],{},"$2.50\u002FM",[71,6874,668],{},[53,6876,6877,6879,6882,6884],{},[71,6878,529],{},[71,6880,6881],{},"$10\u002FM",[71,6883,535],{},[71,6885,548],{},[53,6887,6888,6891,6894,6897],{},[71,6889,6890],{},"Cached Input",[71,6892,6893],{},"$0.31\u002FM",[71,6895,6896],{},"$0.625\u002FM",[71,6898,6899],{},"$0.01875\u002FM",[53,6901,6902,6905,6908,6910],{},[71,6903,6904],{},"Cache 存储",[71,6906,6907],{},"$4.50\u002FM\u002Fhour",[71,6909,5008],{},[71,6911,6912],{},"$1.00\u002FM\u002Fhour",[30,6914,6915,6917,6918,6921,6922,6925],{},[123,6916,3256],{},"：Pro 的价格在 200K input 处",[123,6919,6920],{},"翻倍","！很多人没注意到这点。如果你的 prompt 长期超过 200K，",[123,6923,6924],{},"先做检索压缩再喂给模型","比硬塞 600K 划算。",[26,6927,6928],{"id":6928},"视频理解的真实场景",[30,6930,6931],{},"Gemini Pro 在视频上的护城河目前没人能赶上：",[47,6933,6934,6944],{},[50,6935,6936],{},[53,6937,6938,6941],{},[56,6939,6940],{},"任务",[56,6942,6943],{},"价值",[66,6945,6946,6954,6962,6970,6978],{},[53,6947,6948,6951],{},[71,6949,6950],{},"安防录像分析",[71,6952,6953],{},"找特定行为发生时刻",[53,6955,6956,6959],{},[71,6957,6958],{},"在线课程切片",[71,6960,6961],{},"自动出章节标题 \u002F 摘要 \u002F 字幕",[53,6963,6964,6967],{},[71,6965,6966],{},"电商商品视频",[71,6968,6969],{},"自动出产品标签 \u002F 文案",[53,6971,6972,6975],{},[71,6973,6974],{},"会议录像",[71,6976,6977],{},"自动出会议纪要 \u002F TODO",[53,6979,6980,6983],{},[71,6981,6982],{},"故障复盘",[71,6984,6985],{},"看操作录屏定位用户卡点",[30,6987,6988],{},"调用价：1 分钟 1080p 视频约 250-500 input token，1 小时视频 ~30K token，远比想象中便宜。",[26,6990,6991],{"id":6991},"国内使用",[30,6993,6994],{},"Google API 不对中国大陆开放，需要：",[1416,6996,6997,7000,7003,7006],{},[571,6998,6999],{},"海外网络环境 + Google AI Studio API",[571,7001,7002],{},"Google Cloud Vertex AI（企业级，要海外 GCP 账号）",[571,7004,7005],{},"通过 OpenRouter \u002F 其他中转平台",[571,7007,7008],{},"AWS Bedrock 暂不支持 Gemini（只有 Anthropic \u002F Llama）",[26,7010,7011],{"id":7011},"与其他模型怎么选",[47,7013,7014,7026],{},[50,7015,7016],{},[53,7017,7018,7020,7022,7024],{},[56,7019,1451],{},[56,7021,1561],{},[56,7023,864],{},[56,7025,1558],{},[66,7027,7028,7039,7049,7061,7071,7081,7091],{},[53,7029,7030,7032,7035,7037],{},[71,7031,2526],{},[71,7033,7034],{},"1M（全网最长）",[71,7036,2529],{},[71,7038,2409],{},[53,7040,7041,7043,7045,7047],{},[71,7042,1471],{},[71,7044,692],{},[71,7046,644],{},[71,7048,692],{},[53,7050,7051,7053,7056,7059],{},[71,7052,2568],{},[71,7054,7055],{},"★★★★★（含视频）",[71,7057,7058],{},"★★★☆☆（仅图片）",[71,7060,692],{},[53,7062,7063,7065,7067,7069],{},[71,7064,1462],{},[71,7066,692],{},[71,7068,692],{},[71,7070,644],{},[53,7072,7073,7075,7077,7079],{},[71,7074,1362],{},[71,7076,1610],{},[71,7078,1494],{},[71,7080,1610],{},[53,7082,7083,7085,7087,7089],{},[71,7084,2555],{},[71,7086,692],{},[71,7088,644],{},[71,7090,644],{},[53,7092,7093,7096,7098,7100],{},[71,7094,7095],{},"国内可用",[71,7097,2601],{},[71,7099,2601],{},[71,7101,2601],{},[30,7103,7104,2617],{},[123,7105,3464],{},[568,7107,7108,7111,7114],{},[571,7109,7110],{},"需要处理超长内容或视频 → Gemini 2.5 Pro",[571,7112,7113],{},"主力编程 → Claude Sonnet 4",[571,7115,7116],{},"推理 \u002F Agent → GPT-5 或 Sonnet 4",[26,7118,825],{"id":825},[568,7120,7121,7127,7135,7143,7154,7160],{},[571,7122,7123,7126],{},[123,7124,7125],{},"200K 价格断点","：超过就翻倍。Prompt 设计时尽量切在 200K 以内。",[571,7128,7129,7134],{},[123,7130,7131,7133],{},[139,7132,6103],{}," 默认开","：意味着即使简单任务也可能多花 token。批量场景显式设 0。",[571,7136,7137,7142],{},[123,7138,7139,7141],{},[139,7140,6808],{}," 不设会截断","：默认 8192，长输出务必显式调高到 65536。",[571,7144,7145,7150,7151,7153],{},[123,7146,7147,7149],{},[139,7148,6113],{}," 不够","：还要配 ",[139,7152,6843],{}," 才是强约束，否则模型可能输出\"\u002F\u002F 注释\"破坏 JSON。",[571,7155,7156,7159],{},[123,7157,7158],{},"国内中转质量参差","：OpenRouter 是相对稳定的选择；自建中转要注意 Google 的速率限制。",[571,7161,7162,7165,7166,7169],{},[123,7163,7164],{},"Cache 存储费","：不用了记得 ",[139,7167,7168],{},"caches.delete()","，否则按小时持续扣费。",[26,7171,854],{"id":854},[568,7173,7174,7179,7184,7189],{},[571,7175,7176,7177],{},"轻量版本：",[861,7178,665],{"href":6453},[571,7180,7181,7182],{},"上下文管理：",[861,7183,883],{"href":882},[571,7185,7186,7187],{},"多模态基础：",[861,7188,5371],{"href":5370},[571,7190,7191,7192],{},"工具调用：",[861,7193,2676],{"href":1771},[885,7195,7196],{},"html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .s4XuR, html code.shiki .s4XuR{--shiki-default:#E36209;--shiki-dark:#FFAB70}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}",{"title":137,"searchDepth":174,"depth":174,"links":7198},[7199,7200,7206,7210,7211,7212,7213,7214,7215,7216],{"id":28,"depth":162,"text":28},{"id":35,"depth":162,"text":35,"children":7201},[7202,7203,7204,7205],{"id":5527,"depth":174,"text":5528},{"id":2568,"depth":174,"text":2568},{"id":6329,"depth":174,"text":6330},{"id":6447,"depth":174,"text":6448},{"id":129,"depth":162,"text":130,"children":7207},[7208,7209],{"id":6473,"depth":174,"text":6474},{"id":6601,"depth":174,"text":6602},{"id":2222,"depth":162,"text":2222},{"id":492,"depth":162,"text":492},{"id":6928,"depth":162,"text":6928},{"id":6991,"depth":162,"text":6991},{"id":7011,"depth":162,"text":7011},{"id":825,"depth":162,"text":825},{"id":854,"depth":162,"text":854},"Google Gemini 2.5 Pro 旗舰多模态模型，100 万 token 业界最长上下文，图像 \u002F 音频 \u002F 视频 \u002F 代码原生混合推理，Vertex AI 企业级 SLA 集成，Flash 版本提供 1\u002F16 价格兜底。",{},"\u002Fmodels\u002Fgemini-2.5-pro","Input $1.25\u002FM · Output $10\u002FM · 闪存 $0.075\u002FM",[917,3699],"2025-03-25",{"title":1561,"description":7217},"gemini-2.5-pro","models\u002Fgemini-2.5-pro",[7227,7228,7229,7230,7231],"100 万 token 上下文，全网最长，可处理整本书\u002F整个代码仓库","多模态原生支持（图片\u002F视频\u002F音频\u002F代码）","Flash 版本极快极便宜，适合高吞吐场景","Google 生态集成（Vertex AI \u002F AI Studio）","视频理解能力业界最强",[7233,7234,7235,7236],"超长文档\u002F书籍\u002F代码仓库分析","视频内容理解与总结","多模态应用开发","高吞吐低成本场景（Flash 版本）",[7238,7239,7240,7241],"国内无法直连，需要海外网络","编程实操中不如 Claude Sonnet 4 稳定","API 文档和生态不如 OpenAI\u002FAnthropic 完善","长上下文下有'中间遗忘'现象","eZd7lsemIRUzU2r9EjLKYC-zLT_6T1UqdVcIji-PGbw",{"id":7244,"title":2504,"apiCompatible":7245,"benchmarks":7246,"body":7252,"category":2720,"contextWindow":3692,"description":8191,"extension":910,"maxOutput":1706,"meta":8192,"navigation":177,"path":8193,"pricing":8194,"published":915,"relatedTools":8195,"releaseDate":8198,"seo":8199,"slug":8200,"stem":8201,"strengths":8202,"updated":915,"useCases":8208,"vendor":8213,"vendorEn":8214,"weaknesses":8215,"__hash__":8220},"models\u002Fmodels\u002Fglm-5.2.md",[2752],[7247,7248,7249,7251],{"name":14,"score":2521},{"name":17,"score":5191},{"name":20,"score":7250},"85.6%",{"name":3733,"score":5205},{"type":23,"value":7253,"toc":8171},[7254,7256,7259,7261,7263,7266,7269,7272,7274,7291,7295,7307,7309,7312,7472,7476,7606,7620,7622,7692,7710,7712,7752,7755,7758,7803,7805,7808,7872,7875,7878,7981,7985,7996,7999,8096,8098,8147,8149,8169],[26,7255,28],{"id":28},[30,7257,7258],{},"GLM-5.2 是智谱 AI 于 2026 年 6 月发布的旗舰模型，定位为国产最强编程模型。SWE-bench Verified 65.3%，接近 Claude Sonnet 4 水平。最大优势是国内直连、价格极低、免费 API 额度慷慨。",[26,7260,35],{"id":35},[37,7262,1471],{"id":1471},[30,7264,7265],{},"SWE-bench Verified 65.3%，在国产模型中领先。在中文代码注释理解、中文技术文档生成方面优于海外模型。Cursor、Trae、CodeGeeX 等 IDE 已接入。",[37,7267,7268],{"id":7268},"中文理解",[30,7270,7271],{},"CMMLU 92.3%，中文场景下的指令理解、上下文推理明显优于 GPT-5 和 Claude Sonnet 4。对于中文为主的开发团队，GLM-5.2 是更自然的选择。同样的 prompt 翻译成中文喂给 GLM，效果通常优于把英文 prompt 喂给 Claude。",[37,7273,2598],{"id":2598},[568,7275,7276,7279,7282,7285,7288],{},[571,7277,7278],{},"API 服务器在国内，延迟 \u003C 100ms（vs Claude 中转 500ms+）",[571,7280,7281],{},"无需翻墙、无需中转",[571,7283,7284],{},"支持支付宝 \u002F 微信付款",[571,7286,7287],{},"企业可申请私有化部署",[571,7289,7290],{},"备案合规，可上正式生产环境",[37,7292,7294],{"id":7293},"agent-能力","Agent 能力",[30,7296,7297,7298,7301,7302,7306],{},"支持 ",[861,7299,7300],{"href":1771},"function calling"," 和 ",[861,7303,7305],{"href":7304},"\u002Fwiki\u002Fmcp.html","MCP 协议","，可在 Coze、Dify、元器等 Agent 平台上使用。多步工具调用稳定性接近 Claude Sonnet 4 的 80%，已能胜任大部分企业 Agent 场景。",[26,7308,130],{"id":129},[30,7310,7311],{},"GLM 提供 OpenAI 兼容接口，最低改动量从 OpenAI 迁过来：",[132,7313,7315],{"className":134,"code":7314,"language":136,"meta":137,"style":137},"from openai import OpenAI\n\nclient = OpenAI(\n    api_key=\"...\",\n    base_url=\"https:\u002F\u002Fopen.bigmodel.cn\u002Fapi\u002Fpaas\u002Fv4\u002F\",\n)\n\nresp = client.chat.completions.create(\n    model=\"glm-5.2\",\n    temperature=0,\n    messages=[\n        {\"role\": \"system\", \"content\": \"你是资深 Python 工程师。\"},\n        {\"role\": \"user\", \"content\": \"重构以下代码...\"},\n    ],\n    tools=[...],   # function calling\n)\n",[139,7316,7317,7327,7331,7339,7349,7360,7364,7368,7376,7387,7397,7405,7426,7447,7451,7468],{"__ignoreMap":137},[142,7318,7319,7321,7323,7325],{"class":144,"line":145},[142,7320,149],{"class":148},[142,7322,3061],{"class":152},[142,7324,156],{"class":148},[142,7326,3066],{"class":152},[142,7328,7329],{"class":144,"line":162},[142,7330,178],{"emptyLinePlaceholder":177},[142,7332,7333,7335,7337],{"class":144,"line":174},[142,7334,165],{"class":152},[142,7336,168],{"class":148},[142,7338,3075],{"class":152},[142,7340,7341,7343,7345,7347],{"class":144,"line":181},[142,7342,3080],{"class":200},[142,7344,168],{"class":148},[142,7346,4815],{"class":206},[142,7348,210],{"class":152},[142,7350,7351,7353,7355,7358],{"class":144,"line":188},[142,7352,3092],{"class":200},[142,7354,168],{"class":148},[142,7356,7357],{"class":206},"\"https:\u002F\u002Fopen.bigmodel.cn\u002Fapi\u002Fpaas\u002Fv4\u002F\"",[142,7359,210],{"class":152},[142,7361,7362],{"class":144,"line":197},[142,7363,480],{"class":152},[142,7365,7366],{"class":144,"line":5},[142,7367,178],{"emptyLinePlaceholder":177},[142,7369,7370,7372,7374],{"class":144,"line":230},[142,7371,1007],{"class":152},[142,7373,168],{"class":148},[142,7375,2874],{"class":152},[142,7377,7378,7380,7382,7385],{"class":144,"line":243},[142,7379,201],{"class":200},[142,7381,168],{"class":148},[142,7383,7384],{"class":206},"\"glm-5.2\"",[142,7386,210],{"class":152},[142,7388,7389,7391,7393,7395],{"class":144,"line":272},[142,7390,233],{"class":200},[142,7392,168],{"class":148},[142,7394,238],{"class":220},[142,7396,210],{"class":152},[142,7398,7399,7401,7403],{"class":144,"line":284},[142,7400,246],{"class":200},[142,7402,168],{"class":148},[142,7404,342],{"class":152},[142,7406,7407,7409,7411,7413,7415,7417,7419,7421,7424],{"class":144,"line":299},[142,7408,1292],{"class":152},[142,7410,254],{"class":206},[142,7412,257],{"class":152},[142,7414,3990],{"class":206},[142,7416,263],{"class":152},[142,7418,266],{"class":206},[142,7420,257],{"class":152},[142,7422,7423],{"class":206},"\"你是资深 Python 工程师。\"",[142,7425,1064],{"class":152},[142,7427,7428,7430,7432,7434,7436,7438,7440,7442,7445],{"class":144,"line":471},[142,7429,1292],{"class":152},[142,7431,254],{"class":206},[142,7433,257],{"class":152},[142,7435,260],{"class":206},[142,7437,263],{"class":152},[142,7439,266],{"class":206},[142,7441,257],{"class":152},[142,7443,7444],{"class":206},"\"重构以下代码...\"",[142,7446,1064],{"class":152},[142,7448,7449],{"class":144,"line":477},[142,7450,1321],{"class":152},[142,7452,7453,7456,7458,7460,7462,7465],{"class":144,"line":483},[142,7454,7455],{"class":200},"    tools",[142,7457,168],{"class":148},[142,7459,3804],{"class":152},[142,7461,1939],{"class":220},[142,7463,7464],{"class":152},"],   ",[142,7466,7467],{"class":184},"# function calling\n",[142,7469,7470],{"class":144,"line":3185},[142,7471,480],{"class":152},[37,7473,7475],{"id":7474},"原生-sdkzhipuai","原生 SDK（zhipuai）",[132,7477,7479],{"className":134,"code":7478,"language":136,"meta":137,"style":137},"from zhipuai import ZhipuAI\nclient = ZhipuAI(api_key=\"...\")\n\nresp = client.chat.completions.create(\n    model=\"glm-5.2\",\n    messages=[{\"role\": \"user\", \"content\": \"...\"}],\n    tools=[{\n        \"type\": \"web_search\",      # GLM 内置工具：联网搜索\n        \"web_search\": {\"enable\": True}\n    }],\n)\n",[139,7480,7481,7493,7510,7514,7522,7532,7556,7565,7581,7597,7602],{"__ignoreMap":137},[142,7482,7483,7485,7488,7490],{"class":144,"line":145},[142,7484,149],{"class":148},[142,7486,7487],{"class":152}," zhipuai ",[142,7489,156],{"class":148},[142,7491,7492],{"class":152}," ZhipuAI\n",[142,7494,7495,7497,7499,7502,7504,7506,7508],{"class":144,"line":162},[142,7496,165],{"class":152},[142,7498,168],{"class":148},[142,7500,7501],{"class":152}," ZhipuAI(",[142,7503,1836],{"class":200},[142,7505,168],{"class":148},[142,7507,4815],{"class":206},[142,7509,480],{"class":152},[142,7511,7512],{"class":144,"line":174},[142,7513,178],{"emptyLinePlaceholder":177},[142,7515,7516,7518,7520],{"class":144,"line":181},[142,7517,1007],{"class":152},[142,7519,168],{"class":148},[142,7521,2874],{"class":152},[142,7523,7524,7526,7528,7530],{"class":144,"line":188},[142,7525,201],{"class":200},[142,7527,168],{"class":148},[142,7529,7384],{"class":206},[142,7531,210],{"class":152},[142,7533,7534,7536,7538,7540,7542,7544,7546,7548,7550,7552,7554],{"class":144,"line":197},[142,7535,246],{"class":200},[142,7537,168],{"class":148},[142,7539,251],{"class":152},[142,7541,254],{"class":206},[142,7543,257],{"class":152},[142,7545,260],{"class":206},[142,7547,263],{"class":152},[142,7549,266],{"class":206},[142,7551,257],{"class":152},[142,7553,4815],{"class":206},[142,7555,442],{"class":152},[142,7557,7558,7560,7562],{"class":144,"line":5},[142,7559,7455],{"class":200},[142,7561,168],{"class":148},[142,7563,7564],{"class":152},"[{\n",[142,7566,7567,7570,7572,7575,7578],{"class":144,"line":230},[142,7568,7569],{"class":206},"        \"type\"",[142,7571,257],{"class":152},[142,7573,7574],{"class":206},"\"web_search\"",[142,7576,7577],{"class":152},",      ",[142,7579,7580],{"class":184},"# GLM 内置工具：联网搜索\n",[142,7582,7583,7586,7588,7591,7593,7595],{"class":144,"line":243},[142,7584,7585],{"class":206},"        \"web_search\"",[142,7587,2052],{"class":152},[142,7589,7590],{"class":206},"\"enable\"",[142,7592,257],{"class":152},[142,7594,2217],{"class":220},[142,7596,1316],{"class":152},[142,7598,7599],{"class":144,"line":272},[142,7600,7601],{"class":152},"    }],\n",[142,7603,7604],{"class":144,"line":284},[142,7605,480],{"class":152},[30,7607,7608,7609,7612,7613,7612,7616,7619],{},"GLM 原生 SDK 额外提供了一些 OpenAI 兼容接口里没有的能力，比如 ",[123,7610,7611],{},"内置联网搜索工具","、",[123,7614,7615],{},"code interpreter",[123,7617,7618],{},"glyph 图片理解","等。",[26,7621,2222],{"id":2222},[47,7623,7624,7634],{},[50,7625,7626],{},[53,7627,7628,7630,7632],{},[56,7629,2231],{},[56,7631,713],{},[56,7633,4109],{},[66,7635,7636,7647,7657,7669,7680],{},[53,7637,7638,7642,7645],{},[71,7639,7640],{},[139,7641,1344],{},[71,7643,7644],{},"0.1-0.3",[71,7646,4123],{},[53,7648,7649,7653,7655],{},[71,7650,7651],{},[139,7652,1344],{},[71,7654,2258],{},[71,7656,2261],{},[53,7658,7659,7663,7666],{},[71,7660,7661],{},[139,7662,4140],{},[71,7664,7665],{},"0.7（默认）",[71,7667,7668],{},"不要同时调 temperature 和 top_p",[53,7670,7671,7675,7677],{},[71,7672,7673],{},[139,7674,2279],{},[71,7676,4155],{},[71,7678,7679],{},"默认值小，长输出务必调高",[53,7681,7682,7687,7689],{},[71,7683,7684],{},[139,7685,7686],{},"do_sample",[71,7688,2217],{},[71,7690,7691],{},"False 时退化为 greedy",[30,7693,7694,7695,7697,7698,7701,7702,7705,7706,7709],{},"注意 GLM 的 ",[139,7696,1344],{}," 取值范围是 ",[139,7699,7700],{},"[0.0, 1.0]","（不是 OpenAI 的 0-2），且 ",[123,7703,7704],{},"0 不允许","（要传 ",[139,7707,7708],{},"do_sample=False"," 才能 greedy）。",[26,7711,492],{"id":492},[47,7713,7714,7722],{},[50,7715,7716],{},[53,7717,7718,7720],{},[56,7719,501],{},[56,7721,1362],{},[66,7723,7724,7730,7737,7744],{},[53,7725,7726,7728],{},[71,7727,515],{},[71,7729,4218],{},[53,7731,7732,7734],{},[71,7733,529],{},[71,7735,7736],{},"¥6 \u002F 百万 token",[53,7738,7739,7741],{},[71,7740,5049],{},[71,7742,7743],{},"个人开发者每月 100 万 token（Free Tier）",[53,7745,7746,7749],{},[71,7747,7748],{},"企业批量优惠",[71,7750,7751],{},"月消费 > 10 万元有阶梯折扣",[30,7753,7754],{},"对比 Claude Sonnet 4（Input $3 ≈ ¥21\u002FM），GLM-5.2 的 Input 价格仅为其 1\u002F10。对于预算有限的国内开发者，性价比极高。",[26,7756,7757],{"id":7757},"在哪些工具中可以用到",[568,7759,7760,7765,7771,7777,7782,7788,7793],{},[571,7761,7762,7764],{},[123,7763,4713],{},"（字节 AI IDE）— 默认模型之一",[571,7766,7767,7770],{},[123,7768,7769],{},"CodeGeeX","（智谱自家的 Copilot 插件）— 原生支持",[571,7772,7773,7776],{},[123,7774,7775],{},"CodeBuddy","（腾讯 AI 编程助手）— 已接入",[571,7778,7779,7781],{},[123,7780,2451],{},"（Agent 平台）— 后端模型选项",[571,7783,7784,7787],{},[123,7785,7786],{},"Dify"," — OpenAI 兼容接入",[571,7789,7790,7792],{},[123,7791,4312],{}," — 通过自定义 OpenAI 兼容 API 接入（settings → models）",[571,7794,7795,7798,7799,7802],{},[123,7796,7797],{},"Cline \u002F Continue"," — 配置 ",[139,7800,7801],{},"baseURL"," 即可",[26,7804,4923],{"id":4923},[30,7806,7807],{},"智谱 GLM 系列还有几个变种：",[47,7809,7810,7820],{},[50,7811,7812],{},[53,7813,7814,7816,7818],{},[56,7815,619],{},[56,7817,4934],{},[56,7819,1362],{},[66,7821,7822,7830,7841,7852,7862],{},[53,7823,7824,7826,7828],{},[71,7825,2504],{},[71,7827,4948],{},[71,7829,4359],{},[53,7831,7832,7835,7838],{},[71,7833,7834],{},"GLM-5.2-Air",[71,7836,7837],{},"轻量快速",[71,7839,7840],{},"¥0.5\u002F¥1.5",[53,7842,7843,7846,7849],{},[71,7844,7845],{},"GLM-5.2-Long",[71,7847,7848],{},"1M 上下文",[71,7850,7851],{},"¥2.5\u002F¥7",[53,7853,7854,7857,7860],{},[71,7855,7856],{},"GLM-5.2-Flash",[71,7858,7859],{},"免费",[71,7861,238],{},[53,7863,7864,7867,7869],{},[71,7865,7866],{},"GLM-5.2-Vision",[71,7868,2568],{},[71,7870,7871],{},"¥3\u002F¥10",[30,7873,7874],{},"混搭使用：主力 GLM-5.2，简单任务降级到 Air，超长文档切换 Long，原型验证用免费的 Flash。",[26,7876,7877],{"id":7877},"与海外模型怎么选",[47,7879,7880,7892],{},[50,7881,7882],{},[53,7883,7884,7886,7888,7890],{},[56,7885,1451],{},[56,7887,2504],{},[56,7889,864],{},[56,7891,1558],{},[66,7893,7894,7905,7915,7927,7939,7949,7959,7970],{},[53,7895,7896,7899,7901,7903],{},[71,7897,7898],{},"编程能力",[71,7900,692],{},[71,7902,644],{},[71,7904,692],{},[53,7906,7907,7909,7911,7913],{},[71,7908,7268],{},[71,7910,644],{},[71,7912,692],{},[71,7914,692],{},[53,7916,7917,7919,7922,7925],{},[71,7918,6131],{},[71,7920,7921],{},"✅ 直连",[71,7923,7924],{},"❌ 需中转",[71,7926,7924],{},[53,7928,7929,7932,7934,7937],{},[71,7930,7931],{},"价格（Input）",[71,7933,686],{},[71,7935,7936],{},"¥21\u002FM",[71,7938,4994],{},[53,7940,7941,7943,7945,7947],{},[71,7942,2526],{},[71,7944,2537],{},[71,7946,2529],{},[71,7948,2409],{},[53,7950,7951,7953,7955,7957],{},[71,7952,2555],{},[71,7954,692],{},[71,7956,644],{},[71,7958,644],{},[53,7960,7961,7964,7966,7968],{},[71,7962,7963],{},"生态成熟度",[71,7965,1601],{},[71,7967,644],{},[71,7969,644],{},[53,7971,7972,7975,7977,7979],{},[71,7973,7974],{},"合规备案",[71,7976,1522],{},[71,7978,2601],{},[71,7980,2601],{},[30,7982,7983,2617],{},[123,7984,3464],{},[568,7986,7987,7990,7993],{},[571,7988,7989],{},"国内日常开发 \u002F 备案合规生产 → GLM-5.2 性价比最高",[571,7991,7992],{},"复杂 Agent \u002F 大型项目 → Claude Sonnet 4",[571,7994,7995],{},"多模态视频 → Gemini 2.5 Pro",[26,7997,7998],{"id":7998},"与同档国产模型对比",[47,8000,8001,8015],{},[50,8002,8003],{},[53,8004,8005,8007,8009,8011,8013],{},[56,8006,1451],{},[56,8008,2504],{},[56,8010,680],{},[56,8012,4347],{},[56,8014,4552],{},[66,8016,8017,8029,8041,8053,8069,8083],{},[53,8018,8019,8021,8023,8025,8027],{},[71,8020,626],{},[71,8022,2521],{},[71,8024,689],{},[71,8026,4373],{},[71,8028,1622],{},[53,8030,8031,8033,8035,8037,8039],{},[71,8032,5227],{},[71,8034,686],{},[71,8036,683],{},[71,8038,4951],{},[71,8040,4951],{},[53,8042,8043,8045,8047,8049,8051],{},[71,8044,2526],{},[71,8046,2537],{},[71,8048,2537],{},[71,8050,2537],{},[71,8052,4601],{},[53,8054,8055,8058,8061,8064,8066],{},[71,8056,8057],{},"编程定位",[71,8059,8060],{},"强",[71,8062,8063],{},"中强",[71,8065,1622],{},[71,8067,8068],{},"一般",[53,8070,8071,8074,8076,8079,8081],{},[71,8072,8073],{},"中文",[71,8075,644],{},[71,8077,8078],{},"★★★★",[71,8080,8078],{},[71,8082,8078],{},[53,8084,8085,8087,8089,8092,8094],{},[71,8086,1777],{},[71,8088,8078],{},[71,8090,8091],{},"★★★",[71,8093,8091],{},[71,8095,8091],{},[26,8097,825],{"id":825},[568,8099,8100,8111,8121,8127,8133],{},[571,8101,8102,8107,8108,8110],{},[123,8103,8104,8106],{},[139,8105,2304],{}," 不支持","：要 greedy 解码必须 ",[139,8109,7708],{},"，否则直接报错。",[571,8112,8113,8116,8117,8120],{},[123,8114,8115],{},"base_url 末尾的斜杠","：很多 SDK 对斜杠敏感，统一用 ",[139,8118,8119],{},"https:\u002F\u002Fopen.bigmodel.cn\u002Fapi\u002Fpaas\u002Fv4\u002F","（带尾斜杠）。",[571,8122,8123,8126],{},[123,8124,8125],{},"免费额度有用量上限","：单分钟 RPM 较低，生产场景一定要升级到付费档。",[571,8128,8129,8132],{},[123,8130,8131],{},"联网搜索工具不便宜","：每次触发 web_search 单独计费，批量使用要看清账单。",[571,8134,8135,8138,8139,8142,8143,126],{},[123,8136,8137],{},"工具调用格式","：GLM-5.2 大体兼容 OpenAI tools 格式，但对 ",[139,8140,8141],{},"strict: true"," 模式支持不如 GPT-5，复杂 schema 建议用 ",[861,8144,8146],{"href":8145},"\u002Fwiki\u002Ffunction-calling.html#%E7%BB%93%E6%9E%84%E5%8C%96%E8%BE%93%E5%87%BA%E4%B8%8E%E6%A0%A1%E9%AA%8C","Pydantic + 二次校验",[26,8148,854],{"id":854},[568,8150,8151,8158,8164],{},[571,8152,2673,8153,865,8155],{},[861,8154,2676],{"href":1771},[861,8156,8157],{"href":7304},"MCP",[571,8159,6144,8160,865,8162],{},[861,8161,680],{"href":3653},[861,8163,4347],{"href":4489},[571,8165,8166,8167],{},"海外对标：",[861,8168,864],{"href":863},[885,8170,2697],{},{"title":137,"searchDepth":174,"depth":174,"links":8172},[8173,8174,8180,8183,8184,8185,8186,8187,8188,8189,8190],{"id":28,"depth":162,"text":28},{"id":35,"depth":162,"text":35,"children":8175},[8176,8177,8178,8179],{"id":1471,"depth":174,"text":1471},{"id":7268,"depth":174,"text":7268},{"id":2598,"depth":174,"text":2598},{"id":7293,"depth":174,"text":7294},{"id":129,"depth":162,"text":130,"children":8181},[8182],{"id":7474,"depth":174,"text":7475},{"id":2222,"depth":162,"text":2222},{"id":492,"depth":162,"text":492},{"id":7757,"depth":162,"text":7757},{"id":4923,"depth":162,"text":4923},{"id":7877,"depth":162,"text":7877},{"id":7998,"depth":162,"text":7998},{"id":825,"depth":162,"text":825},{"id":854,"depth":162,"text":854},"智谱 AI GLM-5.2 旗舰大模型（2026 年发布），国产编程与 Agent 工具调用能力第一梯队，国内直连零延迟，输入 ¥2\u002FM 价格友好，提供慷慨免费 API 额度与开源权重，国内开发者主力选项。",{},"\u002Fmodels\u002Fglm-5.2","Input ¥2\u002FM · Output ¥6\u002FM · 闭源免费 API 额度",[2727,8196,8197,5403],"coding\u002Fcopilot\u002Fcodegeex","coding\u002Fcopilot\u002Fcodebuddy","2026-06-15",{"title":2504,"description":8191},"glm-5.2","models\u002Fglm-5.2",[8203,8204,8205,8206,8207],"国产模型编程能力最强之一，中文代码注释理解优秀","国内直连无需翻墙，API 响应快","价格仅为 Claude\u002FGPT 的 1\u002F10","免费 API 额度慷慨，个人开发者友好","中文场景下指令理解优于海外模型",[8209,8210,8211,8212],"国内开发者主力编程模型","中文场景 AI 应用开发","企业私有化部署（支持开源版本）","Agent 平台（Coze \u002F 元器 \u002F Dify）后端模型","智谱 AI","Zhipu AI",[8216,8217,8218,8219],"128K 上下文小于 Claude Sonnet 4（200K）和 GPT-5（400K）","复杂多步 Agent 场景下不如 Claude 稳定","英文长文档推理能力略逊 GPT-5","生态（MCP\u002F工具链）不如 Anthropic\u002FOpenAI 成熟","0iuXaKn9RPAn0qZ0voleZJrtLOF8iyoPrm50aVzGF-4",{"id":8222,"title":2845,"apiCompatible":8223,"benchmarks":8224,"body":8231,"category":6180,"contextWindow":3692,"description":9091,"extension":910,"maxOutput":911,"meta":9092,"navigation":177,"path":9093,"pricing":9094,"published":915,"relatedTools":9095,"releaseDate":9097,"seo":9098,"slug":8908,"stem":9099,"strengths":9100,"updated":915,"useCases":9106,"vendor":9111,"vendorEn":9111,"weaknesses":9112,"__hash__":9116},"models\u002Fmodels\u002Fgpt-4o.md",[2752],[8225,8227,8228,8229],{"name":14,"score":8226},"45.2%",{"name":17,"score":2827},{"name":20,"score":1739},{"name":952,"score":8230},"49.9%",{"type":23,"value":8232,"toc":9073},[8233,8235,8238,8240,8242,8245,8247,8250,8253,8263,8348,8350,8616,8632,8636,8753,8756,8758,8821,8824,8830,8834,8866,8870,8884,8888,8952,8955,8962,8965,8984,8986,9045,9047,9071],[26,8234,28],{"id":28},[30,8236,8237],{},"GPT-4o（\"o\" 代表 omni）是 OpenAI 于 2024 年 5 月发布的多模态模型。虽然 GPT-5 已发布，但 GPT-4o 因其出色的性价比、成熟的生态和稳定的性能，仍然是大量应用的首选模型。",[26,8239,35],{"id":35},[37,8241,2568],{"id":2568},[30,8243,8244],{},"GPT-4o 原生支持文本、图片、音频输入。音频处理能力是其独特优势——可以直接理解语音内容并生成语音回复，延迟仅 232ms。Realtime API（GPT-4o-realtime）支持双向语音流，是目前实时语音 Agent 的事实标准。",[37,8246,629],{"id":629},[30,8248,8249],{},"GPT-4o 的响应速度是 GPT-4 Turbo 的 2 倍。对于需要快速响应的场景（实时聊天、流式输出），GPT-4o 体验最好。",[37,8251,8252],{"id":8252},"函数调用",[30,8254,8255,8256,8258,8259,8262],{},"GPT-4o 的",[861,8257,8252],{"href":1771},"（function calling）非常稳定。在 Agent 场景下，工具调用的格式正确率和意图识别准确率都是业界领先。支持 ",[123,8260,8261],{},"parallel tool calls"," —— 一次返回多个独立调用：",[132,8264,8266],{"className":134,"code":8265,"language":136,"meta":137,"style":137},"resp = client.chat.completions.create(\n    model=\"gpt-4o\",\n    parallel_tool_calls=True,    # 默认开启\n    tools=[...],\n    messages=[{\"role\": \"user\", \"content\": \"对比上海和北京天气\"}],\n)\n# resp.choices[0].message.tool_calls 会同时包含两个 get_weather 调用\n",[139,8267,8268,8276,8287,8302,8314,8339,8343],{"__ignoreMap":137},[142,8269,8270,8272,8274],{"class":144,"line":145},[142,8271,1007],{"class":152},[142,8273,168],{"class":148},[142,8275,2874],{"class":152},[142,8277,8278,8280,8282,8285],{"class":144,"line":162},[142,8279,201],{"class":200},[142,8281,168],{"class":148},[142,8283,8284],{"class":206},"\"gpt-4o\"",[142,8286,210],{"class":152},[142,8288,8289,8292,8294,8296,8299],{"class":144,"line":174},[142,8290,8291],{"class":200},"    parallel_tool_calls",[142,8293,168],{"class":148},[142,8295,2217],{"class":220},[142,8297,8298],{"class":152},",    ",[142,8300,8301],{"class":184},"# 默认开启\n",[142,8303,8304,8306,8308,8310,8312],{"class":144,"line":181},[142,8305,7455],{"class":200},[142,8307,168],{"class":148},[142,8309,3804],{"class":152},[142,8311,1939],{"class":220},[142,8313,3809],{"class":152},[142,8315,8316,8318,8320,8322,8324,8326,8328,8330,8332,8334,8337],{"class":144,"line":188},[142,8317,246],{"class":200},[142,8319,168],{"class":148},[142,8321,251],{"class":152},[142,8323,254],{"class":206},[142,8325,257],{"class":152},[142,8327,260],{"class":206},[142,8329,263],{"class":152},[142,8331,266],{"class":206},[142,8333,257],{"class":152},[142,8335,8336],{"class":206},"\"对比上海和北京天气\"",[142,8338,442],{"class":152},[142,8340,8341],{"class":144,"line":197},[142,8342,480],{"class":152},[142,8344,8345],{"class":144,"line":5},[142,8346,8347],{"class":184},"# resp.choices[0].message.tool_calls 会同时包含两个 get_weather 调用\n",[26,8349,130],{"id":129},[132,8351,8353],{"className":134,"code":8352,"language":136,"meta":137,"style":137},"from openai import OpenAI\nclient = OpenAI()\n\n# 文本\nresp = client.chat.completions.create(\n    model=\"gpt-4o\",\n    temperature=0,           # 工具调用建议 0\n    messages=[\n        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n        {\"role\": \"user\", \"content\": \"Hello\"},\n    ],\n)\n\n# 图片输入\nresp = client.chat.completions.create(\n    model=\"gpt-4o\",\n    messages=[{\n        \"role\": \"user\",\n        \"content\": [\n            {\"type\": \"text\", \"text\": \"这张截图里有什么 bug？\"},\n            {\"type\": \"image_url\", \"image_url\": {\n                \"url\": \"https:\u002F\u002Fexample.com\u002Fscreenshot.png\",\n                \"detail\": \"high\",   # low\u002Fhigh\u002Fauto\n            }}\n        ],\n    }],\n)\n",[139,8354,8355,8365,8374,8378,8383,8391,8401,8414,8422,8443,8464,8468,8472,8476,8481,8489,8499,8507,8518,8526,8548,8565,8578,8594,8600,8606,8611],{"__ignoreMap":137},[142,8356,8357,8359,8361,8363],{"class":144,"line":145},[142,8358,149],{"class":148},[142,8360,3061],{"class":152},[142,8362,156],{"class":148},[142,8364,3066],{"class":152},[142,8366,8367,8369,8371],{"class":144,"line":162},[142,8368,165],{"class":152},[142,8370,168],{"class":148},[142,8372,8373],{"class":152}," OpenAI()\n",[142,8375,8376],{"class":144,"line":174},[142,8377,178],{"emptyLinePlaceholder":177},[142,8379,8380],{"class":144,"line":181},[142,8381,8382],{"class":184},"# 文本\n",[142,8384,8385,8387,8389],{"class":144,"line":188},[142,8386,1007],{"class":152},[142,8388,168],{"class":148},[142,8390,2874],{"class":152},[142,8392,8393,8395,8397,8399],{"class":144,"line":197},[142,8394,201],{"class":200},[142,8396,168],{"class":148},[142,8398,8284],{"class":206},[142,8400,210],{"class":152},[142,8402,8403,8405,8407,8409,8411],{"class":144,"line":5},[142,8404,233],{"class":200},[142,8406,168],{"class":148},[142,8408,238],{"class":220},[142,8410,224],{"class":152},[142,8412,8413],{"class":184},"# 工具调用建议 0\n",[142,8415,8416,8418,8420],{"class":144,"line":230},[142,8417,246],{"class":200},[142,8419,168],{"class":148},[142,8421,342],{"class":152},[142,8423,8424,8426,8428,8430,8432,8434,8436,8438,8441],{"class":144,"line":243},[142,8425,1292],{"class":152},[142,8427,254],{"class":206},[142,8429,257],{"class":152},[142,8431,3990],{"class":206},[142,8433,263],{"class":152},[142,8435,266],{"class":206},[142,8437,257],{"class":152},[142,8439,8440],{"class":206},"\"You are a helpful assistant.\"",[142,8442,1064],{"class":152},[142,8444,8445,8447,8449,8451,8453,8455,8457,8459,8462],{"class":144,"line":272},[142,8446,1292],{"class":152},[142,8448,254],{"class":206},[142,8450,257],{"class":152},[142,8452,260],{"class":206},[142,8454,263],{"class":152},[142,8456,266],{"class":206},[142,8458,257],{"class":152},[142,8460,8461],{"class":206},"\"Hello\"",[142,8463,1064],{"class":152},[142,8465,8466],{"class":144,"line":284},[142,8467,1321],{"class":152},[142,8469,8470],{"class":144,"line":299},[142,8471,480],{"class":152},[142,8473,8474],{"class":144,"line":471},[142,8475,178],{"emptyLinePlaceholder":177},[142,8477,8478],{"class":144,"line":477},[142,8479,8480],{"class":184},"# 图片输入\n",[142,8482,8483,8485,8487],{"class":144,"line":483},[142,8484,1007],{"class":152},[142,8486,168],{"class":148},[142,8488,2874],{"class":152},[142,8490,8491,8493,8495,8497],{"class":144,"line":3185},[142,8492,201],{"class":200},[142,8494,168],{"class":148},[142,8496,8284],{"class":206},[142,8498,210],{"class":152},[142,8500,8501,8503,8505],{"class":144,"line":3190},[142,8502,246],{"class":200},[142,8504,168],{"class":148},[142,8506,7564],{"class":152},[142,8508,8509,8512,8514,8516],{"class":144,"line":3206},[142,8510,8511],{"class":206},"        \"role\"",[142,8513,257],{"class":152},[142,8515,260],{"class":206},[142,8517,210],{"class":152},[142,8519,8520,8523],{"class":144,"line":3218},[142,8521,8522],{"class":206},"        \"content\"",[142,8524,8525],{"class":152},": [\n",[142,8527,8528,8531,8533,8535,8537,8539,8541,8543,8546],{"class":144,"line":3226},[142,8529,8530],{"class":152},"            {",[142,8532,1046],{"class":206},[142,8534,257],{"class":152},[142,8536,2030],{"class":206},[142,8538,263],{"class":152},[142,8540,2030],{"class":206},[142,8542,257],{"class":152},[142,8544,8545],{"class":206},"\"这张截图里有什么 bug？\"",[142,8547,1064],{"class":152},[142,8549,8550,8552,8554,8556,8559,8561,8563],{"class":144,"line":3242},[142,8551,8530],{"class":152},[142,8553,1046],{"class":206},[142,8555,257],{"class":152},[142,8557,8558],{"class":206},"\"image_url\"",[142,8560,263],{"class":152},[142,8562,8558],{"class":206},[142,8564,382],{"class":152},[142,8566,8568,8571,8573,8576],{"class":144,"line":8567},22,[142,8569,8570],{"class":206},"                \"url\"",[142,8572,257],{"class":152},[142,8574,8575],{"class":206},"\"https:\u002F\u002Fexample.com\u002Fscreenshot.png\"",[142,8577,210],{"class":152},[142,8579,8581,8584,8586,8589,8591],{"class":144,"line":8580},23,[142,8582,8583],{"class":206},"                \"detail\"",[142,8585,257],{"class":152},[142,8587,8588],{"class":206},"\"high\"",[142,8590,2886],{"class":152},[142,8592,8593],{"class":184},"# low\u002Fhigh\u002Fauto\n",[142,8595,8597],{"class":144,"line":8596},24,[142,8598,8599],{"class":152},"            }}\n",[142,8601,8603],{"class":144,"line":8602},25,[142,8604,8605],{"class":152},"        ],\n",[142,8607,8609],{"class":144,"line":8608},26,[142,8610,7601],{"class":152},[142,8612,8614],{"class":144,"line":8613},27,[142,8615,480],{"class":152},[30,8617,8618,2617,8624,8627,8628,8631],{},[123,8619,8620,8623],{},[139,8621,8622],{},"detail"," 参数",[139,8625,8626],{},"low"," 模式所有图片当 512×512 处理，固定 85 token；",[139,8629,8630],{},"high"," 模式根据图片大小切片处理，1024×1024 约 765 token。批量分析海量截图先用 low，需要精细识别再升 high。",[37,8633,8635],{"id":8634},"realtime-api双向语音","Realtime API（双向语音）",[132,8637,8639],{"className":134,"code":8638,"language":136,"meta":137,"style":137},"import asyncio\nfrom openai import AsyncOpenAI\n\nasync def voice_agent():\n    client = AsyncOpenAI()\n    async with client.beta.realtime.connect(model=\"gpt-4o-realtime-preview\") as conn:\n        await conn.session.update(session={\"modalities\": [\"text\", \"audio\"]})\n        # 推送音频流 + 接收音频流\n        ...\n",[139,8640,8641,8648,8659,8663,8677,8687,8712,8743,8748],{"__ignoreMap":137},[142,8642,8643,8645],{"class":144,"line":145},[142,8644,156],{"class":148},[142,8646,8647],{"class":152}," asyncio\n",[142,8649,8650,8652,8654,8656],{"class":144,"line":162},[142,8651,149],{"class":148},[142,8653,3061],{"class":152},[142,8655,156],{"class":148},[142,8657,8658],{"class":152}," AsyncOpenAI\n",[142,8660,8661],{"class":144,"line":174},[142,8662,178],{"emptyLinePlaceholder":177},[142,8664,8665,8668,8671,8674],{"class":144,"line":181},[142,8666,8667],{"class":148},"async",[142,8669,8670],{"class":148}," def",[142,8672,8673],{"class":4074}," voice_agent",[142,8675,8676],{"class":152},"():\n",[142,8678,8679,8682,8684],{"class":144,"line":188},[142,8680,8681],{"class":152},"    client ",[142,8683,168],{"class":148},[142,8685,8686],{"class":152}," AsyncOpenAI()\n",[142,8688,8689,8692,8695,8698,8700,8702,8705,8707,8709],{"class":144,"line":197},[142,8690,8691],{"class":148},"    async",[142,8693,8694],{"class":148}," with",[142,8696,8697],{"class":152}," client.beta.realtime.connect(",[142,8699,4915],{"class":200},[142,8701,168],{"class":148},[142,8703,8704],{"class":206},"\"gpt-4o-realtime-preview\"",[142,8706,275],{"class":152},[142,8708,278],{"class":148},[142,8710,8711],{"class":152}," conn:\n",[142,8713,8714,8717,8720,8723,8725,8727,8730,8733,8735,8737,8740],{"class":144,"line":5},[142,8715,8716],{"class":148},"        await",[142,8718,8719],{"class":152}," conn.session.update(",[142,8721,8722],{"class":200},"session",[142,8724,168],{"class":148},[142,8726,363],{"class":152},[142,8728,8729],{"class":206},"\"modalities\"",[142,8731,8732],{"class":152},": [",[142,8734,2030],{"class":206},[142,8736,263],{"class":152},[142,8738,8739],{"class":206},"\"audio\"",[142,8741,8742],{"class":152},"]})\n",[142,8744,8745],{"class":144,"line":230},[142,8746,8747],{"class":184},"        # 推送音频流 + 接收音频流\n",[142,8749,8750],{"class":144,"line":243},[142,8751,8752],{"class":220},"        ...\n",[30,8754,8755],{},"这是 GPT-4o 区别于其他模型最大的护城河——目前没有任何其他模型在 API 层提供这种端到端语音能力。",[26,8757,492],{"id":492},[47,8759,8760,8771],{},[50,8761,8762],{},[53,8763,8764,8766,8768],{},[56,8765,501],{},[56,8767,1362],{},[56,8769,8770],{},"对比 GPT-5",[66,8772,8773,8782,8791,8801,8811],{},[53,8774,8775,8777,8780],{},[71,8776,515],{},[71,8778,8779],{},"$2.5 \u002F 百万 token",[71,8781,5493],{},[53,8783,8784,8786,8789],{},[71,8785,529],{},[71,8787,8788],{},"$10 \u002F 百万 token",[71,8790,5475],{},[53,8792,8793,8795,8798],{},[71,8794,6890],{},[71,8796,8797],{},"$1.25 \u002F 百万 token",[71,8799,8800],{},"10×",[53,8802,8803,8806,8809],{},[71,8804,8805],{},"Realtime Audio Input",[71,8807,8808],{},"$40 \u002F 百万 token",[71,8810,5008],{},[53,8812,8813,8816,8819],{},[71,8814,8815],{},"Realtime Audio Output",[71,8817,8818],{},"$80 \u002F 百万 token",[71,8820,5008],{},[30,8822,8823],{},"GPT-4o 的 Output 价格与 GPT-5 相同，但 Input 价格高一倍。对于对话密集型场景，GPT-5 更划算；对于简单任务，GPT-4o 够用且更成熟。",[30,8825,8826,8829],{},[123,8827,8828],{},"注意 audio token 单价比 text 高 30 倍","。实时语音应用要算清楚账，月成本动辄上千刀。",[26,8831,8833],{"id":8832},"什么时候用-gpt-4o-而非-gpt-5","什么时候用 GPT-4o 而非 GPT-5",[568,8835,8836,8842,8848,8854,8860],{},[571,8837,8838,8841],{},[123,8839,8840],{},"需要音频处理 \u002F 实时语音"," → GPT-4o（GPT-5 Realtime 还在 preview）",[571,8843,8844,8847],{},[123,8845,8846],{},"工具\u002F框架只支持 GPT-4o 接口"," → 直接用 GPT-4o",[571,8849,8850,8853],{},[123,8851,8852],{},"预算敏感且任务简单"," → GPT-4o 够用（虽然 GPT-5 input 更便宜，但 4o 生态老代码无缝迁移）",[571,8855,8856,8859],{},[123,8857,8858],{},"需要最快响应"," → GPT-4o 首 token 更快",[571,8861,8862,8865],{},[123,8863,8864],{},"大量旧 prompt 已经针对 GPT-4o 调过"," → 切 GPT-5 要重测，没必要急",[26,8867,8869],{"id":8868},"什么时候必须升级到-gpt-5","什么时候必须升级到 GPT-5",[568,8871,8872,8875,8878,8881],{},[571,8873,8874],{},"需要 200K 以上上下文（4o 只有 128K）",[571,8876,8877],{},"复杂推理 \u002F 数学（4o GPQA Diamond 49.9% vs GPT-5 62.5%）",[571,8879,8880],{},"SWE-bench \u002F 编程场景（4o 45.2% vs GPT-5 68.0%，差距巨大）",[571,8882,8883],{},"想用最新 Structured Outputs \u002F reasoning_effort 等特性",[26,8885,8887],{"id":8886},"gpt-4o-变种","GPT-4o 变种",[47,8889,8890,8900],{},[50,8891,8892],{},[53,8893,8894,8897],{},[56,8895,8896],{},"变种",[56,8898,8899],{},"用途",[66,8901,8902,8912,8922,8932,8942],{},[53,8903,8904,8909],{},[71,8905,8906],{},[139,8907,8908],{},"gpt-4o",[71,8910,8911],{},"主力",[53,8913,8914,8919],{},[71,8915,8916],{},[139,8917,8918],{},"gpt-4o-mini",[71,8920,8921],{},"轻量级，Input $0.15\u002FM Output $0.6\u002FM，但 SWE-bench 只有 33%",[53,8923,8924,8929],{},[71,8925,8926],{},[139,8927,8928],{},"gpt-4o-realtime-preview",[71,8930,8931],{},"实时双向语音",[53,8933,8934,8939],{},[71,8935,8936],{},[139,8937,8938],{},"gpt-4o-audio-preview",[71,8940,8941],{},"异步音频输入输出",[53,8943,8944,8949],{},[71,8945,8946],{},[139,8947,8948],{},"gpt-4o-transcribe",[71,8950,8951],{},"仅做 STT 转写",[26,8953,8954],{"id":8954},"限流与国内使用",[30,8956,8957,8958,8961],{},"限流体系与 ",[861,8959,1558],{"href":8960},"\u002Fmodels\u002Fgpt-5.html#%E9%99%90%E6%B5%81"," 相同，按账户 Tier 分档。",[30,8963,8964],{},"国内使用三种方式：",[1416,8966,8967,8972,8978],{},[571,8968,8969,8971],{},[123,8970,2435],{},"（OpenRouter \u002F 自建 Cloudflare Workers）",[571,8973,8974,8977],{},[123,8975,8976],{},"Azure OpenAI","（企业级 SLA，但要海外 Azure 账号）",[571,8979,8980,8983],{},[123,8981,8982],{},"GitHub Models","（小流量免费，仅限实验）",[26,8985,825],{"id":825},[568,8987,8988,8999,9014,9020,9031],{},[571,8989,8990,8995,8996,126],{},[123,8991,8992,8994],{},[139,8993,8918],{}," ≠ 便宜版 4o","：SWE-bench \u002F GPQA 都断崖式下跌，只适合分类 \u002F 抽取这类简单任务，",[123,8997,8998],{},"别拿它写代码",[571,9000,9001,9007,9008,9010,9011,9013],{},[123,9002,9003,9006],{},[139,9004,9005],{},"detail: auto"," 不可控","：图像分析用量大时显式指定 ",[139,9009,8626],{}," 或 ",[139,9012,8630],{},"，否则成本飘忽。",[571,9015,9016,9019],{},[123,9017,9018],{},"Realtime API 费用高","：音频 token 单价 30x 文本，长会话每分钟可能花到一两毛美元，要做限时控制。",[571,9021,9022,9025,9026,9030],{},[123,9023,9024],{},"128K 上下文也有\"中间遗忘\"","：超过 ~80K 后召回率明显下降，长文档分析要靠 ",[861,9027,9029],{"href":9028},"\u002Fwiki\u002Frag.html","RAG"," 而不是硬塞。",[571,9032,9033,9036,9037,9040,9041,9044],{},[123,9034,9035],{},"stream 模式下 tool_calls 拼装","：流式响应里 ",[139,9038,9039],{},"tool_calls"," 是分片到达的，要手动累加 ",[139,9042,9043],{},"arguments"," 字符串再解析 JSON，新手常踩。",[26,9046,854],{"id":854},[568,9048,9049,9054,9058,9064],{},[571,9050,9051,9052],{},"上位替代：",[861,9053,1558],{"href":3669},[571,9055,7191,9056],{},[861,9057,2676],{"href":1771},[571,9059,9060,9061,9063],{},"多模态视角：",[861,9062,5371],{"href":5370},"（图像 embedding 互补）",[571,9065,9066,9067,865,9069],{},"省 token：",[861,9068,876],{"href":875},[861,9070,883],{"href":882},[885,9072,4502],{},{"title":137,"searchDepth":174,"depth":174,"links":9074},[9075,9076,9081,9084,9085,9086,9087,9088,9089,9090],{"id":28,"depth":162,"text":28},{"id":35,"depth":162,"text":35,"children":9077},[9078,9079,9080],{"id":2568,"depth":174,"text":2568},{"id":629,"depth":174,"text":629},{"id":8252,"depth":174,"text":8252},{"id":129,"depth":162,"text":130,"children":9082},[9083],{"id":8634,"depth":174,"text":8635},{"id":492,"depth":162,"text":492},{"id":8832,"depth":162,"text":8833},{"id":8868,"depth":162,"text":8869},{"id":8886,"depth":162,"text":8887},{"id":8954,"depth":162,"text":8954},{"id":825,"depth":162,"text":825},{"id":854,"depth":162,"text":854},"OpenAI GPT-4o 多模态主力模型，文本 \u002F 图像 \u002F 语音原生统一推理，响应速度业界领先，开发者生态最成熟（Function Calling、Assistants API、Structured Outputs 一线适配），适合通用多模态应用与高吞吐生产部署。",{},"\u002Fmodels\u002Fgpt-4o","Input $2.5\u002FM · Output $10\u002FM · Cached $1.25\u002FM",[917,9096,3699],"coding\u002Fcopilot\u002Fgithub-copilot","2024-05-13",{"title":2845,"description":9091},"models\u002Fgpt-4o",[9101,9102,9103,9104,9105],"多模态原生支持（图片\u002F音频\u002F视频），速度快","性价比高，比 GPT-5 便宜一半","生态最成熟，几乎所有工具都支持","流式响应快，首 token 延迟低","函数调用稳定可靠",[9107,9108,9109,9110],"多模态应用（图片理解、OCR、截图分析）","聊天机器人 \u002F 客服","函数调用 \u002F 工具编排","高吞吐低成本场景","OpenAI",[9113,9114,9115,939],"推理能力不如 GPT-5 \u002F Claude Sonnet 4","编程能力中规中矩，不如 Claude","128K 上下文小于 GPT-5（400K）","uP9prXh48NY26epOwVKpz1lIvpH30LeB6sOD84GAQOU",{"id":9118,"title":1558,"apiCompatible":9119,"benchmarks":9120,"body":9127,"category":1704,"contextWindow":10068,"description":10069,"extension":910,"maxOutput":3692,"meta":10070,"navigation":177,"path":10071,"pricing":10072,"published":915,"relatedTools":10073,"releaseDate":10074,"seo":10075,"slug":10076,"stem":10077,"strengths":10078,"updated":915,"useCases":10084,"vendor":9111,"vendorEn":9111,"weaknesses":10089,"__hash__":10094},"models\u002Fmodels\u002Fgpt-5.md",[2752],[9121,9122,9124,9125,9126],{"name":14,"score":2515},{"name":17,"score":9123},"94.6%",{"name":20,"score":5191},{"name":952,"score":1572},{"name":1580,"score":1586},{"type":23,"value":9128,"toc":10046},[9129,9131,9134,9136,9139,9152,9154,9161,9250,9255,9257,9260,9274,9276,9279,9281,9284,9421,9425,9432,9580,9585,9589,9592,9612,9619,9621,9707,9709,9748,9751,9754,9775,9779,9782,9868,9871,9875,9969,9974,9976,10018,10020,10043],[26,9130,28],{"id":28},[30,9132,9133],{},"GPT-5 是 OpenAI 于 2025 年 8 月发布的旗舰模型。400K token 上下文窗口和 128K 输出窗口使其成为处理超长内容的最佳选择之一。在推理、数学、多模态方面均为顶级水平。",[26,9135,35],{"id":35},[37,9137,9138],{"id":9138},"超长上下文",[30,9140,9141,9142,9144,9145,9148,9149,9151],{},"400K token 的上下文窗口是 ",[861,9143,864],{"href":863},"（200K）的两倍。对于需要分析整个代码仓库、超长文档或多文件对比的场景，GPT-5 是更好的选择。但",[123,9146,9147],{},"实测上下文超过 200K 后质量下降明显","，参考 ",[861,9150,883],{"href":882}," 里的「中间遗忘」现象。",[37,9153,2777],{"id":2777},[30,9155,9156,9157,9160],{},"在 MATH-500 上拿到 98.4%，GPQA Diamond 62.5%，科学推理能力领先。GPT-5 是 OpenAI 把 GPT 主线和 o-series 推理线",[123,9158,9159],{},"合并","后的产物——内置 reasoning effort 控制：",[132,9162,9164],{"className":134,"code":9163,"language":136,"meta":137,"style":137},"from openai import OpenAI\nclient = OpenAI()\n\nresp = client.chat.completions.create(\n    model=\"gpt-5\",\n    reasoning_effort=\"high\",   # low \u002F medium \u002F high\n    messages=[{\"role\": \"user\", \"content\": \"Prove that...\"}],\n)\n",[139,9165,9166,9176,9184,9188,9196,9207,9221,9246],{"__ignoreMap":137},[142,9167,9168,9170,9172,9174],{"class":144,"line":145},[142,9169,149],{"class":148},[142,9171,3061],{"class":152},[142,9173,156],{"class":148},[142,9175,3066],{"class":152},[142,9177,9178,9180,9182],{"class":144,"line":162},[142,9179,165],{"class":152},[142,9181,168],{"class":148},[142,9183,8373],{"class":152},[142,9185,9186],{"class":144,"line":174},[142,9187,178],{"emptyLinePlaceholder":177},[142,9189,9190,9192,9194],{"class":144,"line":181},[142,9191,1007],{"class":152},[142,9193,168],{"class":148},[142,9195,2874],{"class":152},[142,9197,9198,9200,9202,9205],{"class":144,"line":188},[142,9199,201],{"class":200},[142,9201,168],{"class":148},[142,9203,9204],{"class":206},"\"gpt-5\"",[142,9206,210],{"class":152},[142,9208,9209,9212,9214,9216,9218],{"class":144,"line":197},[142,9210,9211],{"class":200},"    reasoning_effort",[142,9213,168],{"class":148},[142,9215,8588],{"class":206},[142,9217,2886],{"class":152},[142,9219,9220],{"class":184},"# low \u002F medium \u002F high\n",[142,9222,9223,9225,9227,9229,9231,9233,9235,9237,9239,9241,9244],{"class":144,"line":5},[142,9224,246],{"class":200},[142,9226,168],{"class":148},[142,9228,251],{"class":152},[142,9230,254],{"class":206},[142,9232,257],{"class":152},[142,9234,260],{"class":206},[142,9236,263],{"class":152},[142,9238,266],{"class":206},[142,9240,257],{"class":152},[142,9242,9243],{"class":206},"\"Prove that...\"",[142,9245,442],{"class":152},[142,9247,9248],{"class":144,"line":230},[142,9249,480],{"class":152},[30,9251,9252,9254],{},[139,9253,8630],{}," 模式下模型会用大量 reasoning token 思考再回答，复杂数学\u002F算法成功率显著上升，但 output token 消费增加 3-5 倍。",[37,9256,2568],{"id":2568},[30,9258,9259],{},"原生支持图片、音频和视频输入。可以：",[568,9261,9262,9265,9268,9271],{},[571,9263,9264],{},"分析 UI 截图并生成前端代码",[571,9266,9267],{},"理解白板照片中的架构图",[571,9269,9270],{},"分析数据图表",[571,9272,9273],{},"转录 + 理解音频内容",[37,9275,1471],{"id":1471},[30,9277,9278],{},"SWE-bench Verified 68.0%，略低于 Claude Sonnet 4 的 72.7%。实际使用中，Cursor 用户反馈 Claude Sonnet 4 在多文件改写和代码审查方面更稳定，但 GPT-5 在算法实现和数学密集型代码上更强。",[26,9280,130],{"id":129},[37,9282,9283],{"id":9283},"基础调用",[132,9285,9287],{"className":134,"code":9286,"language":136,"meta":137,"style":137},"from openai import OpenAI\nclient = OpenAI(api_key=\"sk-...\")\n\nresp = client.chat.completions.create(\n    model=\"gpt-5\",\n    messages=[\n        {\"role\": \"system\", \"content\": \"You are a senior engineer.\"},\n        {\"role\": \"user\", \"content\": \"Refactor this function.\"},\n    ],\n    temperature=1,           # 推理模型保持默认 1\n)\nprint(resp.choices[0].message.content)\n",[139,9288,9289,9299,9316,9320,9328,9338,9346,9367,9388,9392,9406,9410],{"__ignoreMap":137},[142,9290,9291,9293,9295,9297],{"class":144,"line":145},[142,9292,149],{"class":148},[142,9294,3061],{"class":152},[142,9296,156],{"class":148},[142,9298,3066],{"class":152},[142,9300,9301,9303,9305,9308,9310,9312,9314],{"class":144,"line":162},[142,9302,165],{"class":152},[142,9304,168],{"class":148},[142,9306,9307],{"class":152}," OpenAI(",[142,9309,1836],{"class":200},[142,9311,168],{"class":148},[142,9313,3085],{"class":206},[142,9315,480],{"class":152},[142,9317,9318],{"class":144,"line":174},[142,9319,178],{"emptyLinePlaceholder":177},[142,9321,9322,9324,9326],{"class":144,"line":181},[142,9323,1007],{"class":152},[142,9325,168],{"class":148},[142,9327,2874],{"class":152},[142,9329,9330,9332,9334,9336],{"class":144,"line":188},[142,9331,201],{"class":200},[142,9333,168],{"class":148},[142,9335,9204],{"class":206},[142,9337,210],{"class":152},[142,9339,9340,9342,9344],{"class":144,"line":197},[142,9341,246],{"class":200},[142,9343,168],{"class":148},[142,9345,342],{"class":152},[142,9347,9348,9350,9352,9354,9356,9358,9360,9362,9365],{"class":144,"line":5},[142,9349,1292],{"class":152},[142,9351,254],{"class":206},[142,9353,257],{"class":152},[142,9355,3990],{"class":206},[142,9357,263],{"class":152},[142,9359,266],{"class":206},[142,9361,257],{"class":152},[142,9363,9364],{"class":206},"\"You are a senior engineer.\"",[142,9366,1064],{"class":152},[142,9368,9369,9371,9373,9375,9377,9379,9381,9383,9386],{"class":144,"line":230},[142,9370,1292],{"class":152},[142,9372,254],{"class":206},[142,9374,257],{"class":152},[142,9376,260],{"class":206},[142,9378,263],{"class":152},[142,9380,266],{"class":206},[142,9382,257],{"class":152},[142,9384,9385],{"class":206},"\"Refactor this function.\"",[142,9387,1064],{"class":152},[142,9389,9390],{"class":144,"line":243},[142,9391,1321],{"class":152},[142,9393,9394,9396,9398,9401,9403],{"class":144,"line":272},[142,9395,233],{"class":200},[142,9397,168],{"class":148},[142,9399,9400],{"class":220},"1",[142,9402,224],{"class":152},[142,9404,9405],{"class":184},"# 推理模型保持默认 1\n",[142,9407,9408],{"class":144,"line":284},[142,9409,480],{"class":152},[142,9411,9412,9414,9416,9418],{"class":144,"line":299},[142,9413,1330],{"class":220},[142,9415,2930],{"class":152},[142,9417,238],{"class":220},[142,9419,9420],{"class":152},"].message.content)\n",[37,9422,9424],{"id":9423},"structured-outputs强约束-json","Structured Outputs（强约束 JSON）",[30,9426,9427,9428,9431],{},"GPT-5 的 ",[861,9429,9430],{"href":1771},"Structured Outputs"," 是工具调用最稳的：",[132,9433,9435],{"className":134,"code":9434,"language":136,"meta":137,"style":137},"from pydantic import BaseModel\n\nclass CodeReview(BaseModel):\n    severity: str\n    issues: list[str]\n    suggestions: list[str]\n\nresp = client.chat.completions.parse(\n    model=\"gpt-5\",\n    response_format=CodeReview,\n    messages=[{\"role\": \"user\", \"content\": \"Review:\\n\" + code}],\n)\nreview: CodeReview = resp.choices[0].message.parsed\n",[139,9436,9437,9449,9453,9469,9477,9488,9497,9501,9510,9520,9530,9562,9566],{"__ignoreMap":137},[142,9438,9439,9441,9444,9446],{"class":144,"line":145},[142,9440,149],{"class":148},[142,9442,9443],{"class":152}," pydantic ",[142,9445,156],{"class":148},[142,9447,9448],{"class":152}," BaseModel\n",[142,9450,9451],{"class":144,"line":162},[142,9452,178],{"emptyLinePlaceholder":177},[142,9454,9455,9458,9461,9463,9466],{"class":144,"line":174},[142,9456,9457],{"class":148},"class",[142,9459,9460],{"class":4074}," CodeReview",[142,9462,1136],{"class":152},[142,9464,9465],{"class":4074},"BaseModel",[142,9467,9468],{"class":152},"):\n",[142,9470,9471,9474],{"class":144,"line":181},[142,9472,9473],{"class":152},"    severity: ",[142,9475,9476],{"class":220},"str\n",[142,9478,9479,9482,9485],{"class":144,"line":188},[142,9480,9481],{"class":152},"    issues: list[",[142,9483,9484],{"class":220},"str",[142,9486,9487],{"class":152},"]\n",[142,9489,9490,9493,9495],{"class":144,"line":197},[142,9491,9492],{"class":152},"    suggestions: list[",[142,9494,9484],{"class":220},[142,9496,9487],{"class":152},[142,9498,9499],{"class":144,"line":5},[142,9500,178],{"emptyLinePlaceholder":177},[142,9502,9503,9505,9507],{"class":144,"line":230},[142,9504,1007],{"class":152},[142,9506,168],{"class":148},[142,9508,9509],{"class":152}," client.chat.completions.parse(\n",[142,9511,9512,9514,9516,9518],{"class":144,"line":243},[142,9513,201],{"class":200},[142,9515,168],{"class":148},[142,9517,9204],{"class":206},[142,9519,210],{"class":152},[142,9521,9522,9525,9527],{"class":144,"line":272},[142,9523,9524],{"class":200},"    response_format",[142,9526,168],{"class":148},[142,9528,9529],{"class":152},"CodeReview,\n",[142,9531,9532,9534,9536,9538,9540,9542,9544,9546,9548,9550,9553,9555,9557,9559],{"class":144,"line":284},[142,9533,246],{"class":200},[142,9535,168],{"class":148},[142,9537,251],{"class":152},[142,9539,254],{"class":206},[142,9541,257],{"class":152},[142,9543,260],{"class":206},[142,9545,263],{"class":152},[142,9547,266],{"class":206},[142,9549,257],{"class":152},[142,9551,9552],{"class":206},"\"Review:",[142,9554,1310],{"class":220},[142,9556,372],{"class":206},[142,9558,5626],{"class":148},[142,9560,9561],{"class":152}," code}],\n",[142,9563,9564],{"class":144,"line":299},[142,9565,480],{"class":152},[142,9567,9568,9571,9573,9575,9577],{"class":144,"line":471},[142,9569,9570],{"class":152},"review: CodeReview ",[142,9572,168],{"class":148},[142,9574,3198],{"class":152},[142,9576,238],{"class":220},[142,9578,9579],{"class":152},"].message.parsed\n",[30,9581,9582,9584],{},[139,9583,8141],{}," 模式保证 100% 符合 schema，省去后处理校验。",[37,9586,9588],{"id":9587},"prompt-cache自动开启","Prompt Cache（自动开启）",[30,9590,9591],{},"OpenAI 的 cache 是自动触发的——任何 ≥1024 token 的前缀重复出现就自动命中，Cache Read 价格 $0.125\u002FM（-90%）：",[132,9593,9595],{"className":134,"code":9594,"language":136,"meta":137,"style":137},"# 第一次调用：input 完整计费\n# 第二次同样 system prompt 开头：input 前缀自动 cache 命中\n# 无需任何代码改动，OpenAI 后台自动判断\n",[139,9596,9597,9602,9607],{"__ignoreMap":137},[142,9598,9599],{"class":144,"line":145},[142,9600,9601],{"class":184},"# 第一次调用：input 完整计费\n",[142,9603,9604],{"class":144,"line":162},[142,9605,9606],{"class":184},"# 第二次同样 system prompt 开头：input 前缀自动 cache 命中\n",[142,9608,9609],{"class":144,"line":174},[142,9610,9611],{"class":184},"# 无需任何代码改动，OpenAI 后台自动判断\n",[30,9613,9614,9615,9618],{},"要利用好它，",[123,9616,9617],{},"动态内容必须放在 messages 末尾","，不要在 system 里插时间戳。",[26,9620,2222],{"id":2222},[47,9622,9623,9633],{},[50,9624,9625],{},[53,9626,9627,9629,9631],{},[56,9628,2231],{},[56,9630,2234],{},[56,9632,4109],{},[66,9634,9635,9652,9665,9680,9697],{},[53,9636,9637,9641,9644],{},[71,9638,9639],{},[139,9640,1344],{},[71,9642,9643],{},"1（默认）",[71,9645,9646,9647,2649,9650],{},"GPT-5 是推理模型，",[123,9648,9649],{},"不要改 temperature",[861,9651,1349],{"href":1677},[53,9653,9654,9659,9662],{},[71,9655,9656],{},[139,9657,9658],{},"reasoning_effort",[71,9660,9661],{},"low \u002F medium \u002F high",[71,9663,9664],{},"控制内部推理深度，复杂任务 high",[53,9666,9667,9672,9674],{},[71,9668,9669],{},[139,9670,9671],{},"max_completion_tokens",[71,9673,4155],{},[71,9675,9676,9677,9679],{},"GPT-5 用这个而非 ",[139,9678,2279],{},"，老参数被废弃",[53,9681,9682,9687,9690],{},[71,9683,9684],{},[139,9685,9686],{},"seed",[71,9688,9689],{},"固定值",[71,9691,9692,9693,9696],{},"best-effort 复现，配合 ",[139,9694,9695],{},"system_fingerprint"," 验证",[53,9698,9699,9703,9705],{},[71,9700,9701],{},[139,9702,4140],{},[71,9704,9643],{},[71,9706,7668],{},[26,9708,492],{"id":492},[47,9710,9711,9719],{},[50,9712,9713],{},[53,9714,9715,9717],{},[56,9716,501],{},[56,9718,1362],{},[66,9720,9721,9727,9733,9740],{},[53,9722,9723,9725],{},[71,9724,515],{},[71,9726,8797],{},[53,9728,9729,9731],{},[71,9730,529],{},[71,9732,8788],{},[53,9734,9735,9737],{},[71,9736,6890],{},[71,9738,9739],{},"$0.125 \u002F 百万 token",[53,9741,9742,9745],{},[71,9743,9744],{},"Batch API（24h）",[71,9746,9747],{},"-50%",[30,9749,9750],{},"GPT-5 的 Input 价格仅为 Claude Sonnet 4 的 42%，Output 价格低 33%。对于高吞吐场景（批量处理、大量 API 调用），GPT-5 的成本优势明显。",[37,9752,9753],{"id":9753},"实际账单注意",[568,9755,9756,9770],{},[571,9757,9758,9761,9762,9765,9766,9769],{},[123,9759,9760],{},"reasoning token"," 算 output：开 ",[139,9763,9764],{},"reasoning_effort=high"," 时单条对话 output 可能是普通模式的 3-5 倍。账单会单独显示 ",[139,9767,9768],{},"reasoning_tokens"," 字段。",[571,9771,9772,9774],{},[123,9773,5760],{},"：异步批量请求 24 小时内出结果，所有价格 -50%，非常适合离线数据处理。",[26,9776,9778],{"id":9777},"限流tier-体系","限流（Tier 体系）",[30,9780,9781],{},"OpenAI 的 rate limit 按账户消费分 5 个 Tier：",[47,9783,9784,9800],{},[50,9785,9786],{},[53,9787,9788,9791,9794,9797],{},[56,9789,9790],{},"Tier",[56,9792,9793],{},"月消费门槛",[56,9795,9796],{},"RPM",[56,9798,9799],{},"TPM",[66,9801,9802,9815,9829,9842,9855],{},[53,9803,9804,9807,9810,9812],{},[71,9805,9806],{},"Tier 1",[71,9808,9809],{},"$5",[71,9811,221],{},[71,9813,9814],{},"30K",[53,9816,9817,9820,9823,9826],{},[71,9818,9819],{},"Tier 2",[71,9821,9822],{},"$50",[71,9824,9825],{},"5,000",[71,9827,9828],{},"450K",[53,9830,9831,9834,9837,9839],{},[71,9832,9833],{},"Tier 3",[71,9835,9836],{},"$100",[71,9838,9825],{},[71,9840,9841],{},"800K",[53,9843,9844,9847,9850,9853],{},[71,9845,9846],{},"Tier 4",[71,9848,9849],{},"$250",[71,9851,9852],{},"10,000",[71,9854,2398],{},[53,9856,9857,9860,9863,9865],{},[71,9858,9859],{},"Tier 5",[71,9861,9862],{},"$1,000+",[71,9864,9852],{},[71,9866,9867],{},"30M",[30,9869,9870],{},"生产环境跑量前先把 Tier 提到 3+，否则会频繁 429。Token 限流（TPM）比请求限流（RPM）更容易先打到，长 prompt 场景尤其。",[26,9872,9874],{"id":9873},"与-claude-sonnet-4-怎么选","与 Claude Sonnet 4 怎么选",[47,9876,9877,9887],{},[50,9878,9879],{},[53,9880,9881,9883,9885],{},[56,9882,1451],{},[56,9884,1558],{},[56,9886,864],{},[66,9888,9889,9898,9907,9916,9926,9935,9943,9952,9961],{},[53,9890,9891,9894,9896],{},[71,9892,9893],{},"编程（Cursor\u002FClaude Code）",[71,9895,692],{},[71,9897,644],{},[53,9899,9900,9903,9905],{},[71,9901,9902],{},"推理\u002F数学",[71,9904,644],{},[71,9906,692],{},[53,9908,9909,9912,9914],{},[71,9910,9911],{},"上下文长度",[71,9913,2409],{},[71,9915,2529],{},[53,9917,9918,9920,9923],{},[71,9919,2568],{},[71,9921,9922],{},"原生（图+音+视频）",[71,9924,9925],{},"仅图片",[53,9927,9928,9931,9933],{},[71,9929,9930],{},"工具调用 \u002F Structured Outputs",[71,9932,644],{},[71,9934,644],{},[53,9936,9937,9939,9941],{},[71,9938,2584],{},[71,9940,2589],{},[71,9942,521],{},[53,9944,9945,9948,9950],{},[71,9946,9947],{},"Output 价格",[71,9949,6881],{},[71,9951,535],{},[53,9953,9954,9957,9959],{},[71,9955,9956],{},"Agent 工具调用稳定性",[71,9958,692],{},[71,9960,644],{},[53,9962,9963,9965,9967],{},[71,9964,7095],{},[71,9966,2601],{},[71,9968,2601],{},[30,9970,9971,9973],{},[123,9972,3464],{},"：主力编程用 Claude Sonnet 4，需要超长上下文 \u002F 多模态 \u002F 复杂推理时切 GPT-5。混搭最香。",[26,9975,825],{"id":825},[568,9977,9978,9989,9997,10003,10012],{},[571,9979,9980,9985,9986,9988],{},[123,9981,9982,9983],{},"不要再用 ",[139,9984,2279],{},"：GPT-5 上是 ",[139,9987,9671],{},"。老代码迁过来会报错或行为异常。",[571,9990,9991,9994,9995,126],{},[123,9992,9993],{},"不要把 temperature 设 0","：推理模型设 0 反而降质量，详见 ",[861,9996,1349],{"href":1677},[571,9998,9999,10002],{},[123,10000,10001],{},"reasoning_effort 不要默认 high","：贵且慢。默认 medium，遇到难题再升 high。",[571,10004,10005,10008,10009,10011],{},[123,10006,10007],{},"400K 不要塞满","：上下文超过 200K 后召回精度下降明显，配合 ",[861,10010,9029],{"href":9028}," 或检索式压缩更靠谱。",[571,10013,10014,10017],{},[123,10015,10016],{},"多模态 input 也算 token","：一张 1024×1024 图片约消耗 ~1000 input token，批量处理要算账。",[26,10019,854],{"id":854},[568,10021,10022,10029,10033,10038],{},[571,10023,10024,10025,865,10027],{},"对比同档：",[861,10026,864],{"href":863},[861,10028,1561],{"href":5445},[571,10030,2673,10031],{},[861,10032,2676],{"href":1771},[571,10034,10035,10036],{},"长上下文：",[861,10037,883],{"href":882},[571,10039,10040,10041],{},"推理参数：",[861,10042,1349],{"href":1677},[885,10044,10045],{},"html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .s4XuR, html code.shiki .s4XuR{--shiki-default:#E36209;--shiki-dark:#FFAB70}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sScJk, html code.shiki .sScJk{--shiki-default:#6F42C1;--shiki-dark:#B392F0}",{"title":137,"searchDepth":174,"depth":174,"links":10047},[10048,10049,10055,10060,10061,10064,10065,10066,10067],{"id":28,"depth":162,"text":28},{"id":35,"depth":162,"text":35,"children":10050},[10051,10052,10053,10054],{"id":9138,"depth":174,"text":9138},{"id":2777,"depth":174,"text":2777},{"id":2568,"depth":174,"text":2568},{"id":1471,"depth":174,"text":1471},{"id":129,"depth":162,"text":130,"children":10056},[10057,10058,10059],{"id":9283,"depth":174,"text":9283},{"id":9423,"depth":174,"text":9424},{"id":9587,"depth":174,"text":9588},{"id":2222,"depth":162,"text":2222},{"id":492,"depth":162,"text":492,"children":10062},[10063],{"id":9753,"depth":174,"text":9753},{"id":9777,"depth":162,"text":9778},{"id":9873,"depth":162,"text":9874},{"id":825,"depth":162,"text":825},{"id":854,"depth":162,"text":854},400000,"OpenAI 2025 年旗舰模型，400K 上下文 + 128K 输出 + 多模态，推理能力顶级，API 价格低于 Claude Sonnet 4。",{},"\u002Fmodels\u002Fgpt-5","Input $1.25\u002FM · Output $10\u002FM · Cached $0.125\u002FM",[917,9096,3699],"2025-08-07",{"title":1558,"description":10069},"gpt-5","models\u002Fgpt-5",[10079,10080,10081,10082,10083],"400K 超长上下文，全项目代码分析无压力","128K 输出窗口，长文件一次生成","多模态原生支持（图片、音频、视频）","推理能力顶级，数学\u002F科学\u002F代码全面","API 价格比 Sonnet 4 更低",[10085,10086,10087,10088],"通用推理与数学","多模态分析（图片\u002F截图理解）","长文档\u002F全项目代码分析","ChatGPT \u002F API 对话",[10090,10091,10092,10093],"国内无官方 API，需走中转","编程实操中不如 Claude Sonnet 4 稳定（Cursor 用户反馈）","延迟略高，流式首 token 慢于 Sonnet 4","知识截止较早","q83eVVwHgUNMitKxvtSqyVwIqO3IweXrUG_jjAJbzFg",{"id":10096,"title":4618,"apiCompatible":10097,"benchmarks":10098,"body":10105,"category":907,"contextWindow":3692,"description":10943,"extension":910,"maxOutput":4525,"meta":10944,"navigation":177,"path":10945,"pricing":10946,"published":915,"relatedTools":10947,"releaseDate":10948,"seo":10949,"slug":10950,"stem":10951,"strengths":10952,"updated":915,"useCases":10957,"vendor":10962,"vendorEn":10963,"weaknesses":10964,"__hash__":10969},"models\u002Fmodels\u002Fkimi-k2.md",[2752],[10099,10101,10103],{"name":17,"score":10100},"82.6%",{"name":20,"score":10102},"82.1%",{"name":3733,"score":10104},"89.4%",{"type":23,"value":10106,"toc":10925},[10107,10109,10112,10114,10117,10120,10134,10137,10140,10149,10194,10205,10207,10210,10224,10226,10229,10365,10369,10372,10529,10532,10535,10598,10604,10606,10649,10662,10665,10668,10682,10685,10689,10798,10802,10816,10818,10820,10840,10842,10856,10858,10898,10900,10923],[26,10108,28],{"id":28},[30,10110,10111],{},"Kimi K2 是月之暗面（Moonshot AI）于 2025 年 10 月发布的旗舰模型。Kimi 以超长文本处理能力起家——网页版长期以\"200 万字上下文\"为卖点出圈，K2 继续强化这一优势，在中文长文档分析场景中表现突出。",[26,10113,35],{"id":35},[37,10115,10116],{"id":10116},"超长文本处理",[30,10118,10119],{},"Kimi 的传统强项。可以处理：",[568,10121,10122,10125,10128,10131],{},[571,10123,10124],{},"200 万字中文长文（约 300 页）",[571,10126,10127],{},"整本学术著作",[571,10129,10130],{},"完整的法律卷宗",[571,10132,10133],{},"多份财报对比分析",[30,10135,10136],{},"在长文本理解的准确性和连贯性上，K2 在国产模型中名列前茅。月之暗面在长文场景的工程优化（chunking strategy、attention pattern）做得很扎实，比单纯堆上下文窗口的厂商质量更稳。",[37,10138,10139],{"id":10139},"联网搜索",[30,10141,10142,10143,10146,10147,2617],{},"Kimi 网页版",[123,10144,10145],{},"默认带联网搜索","——回答问题时会先搜索最新信息，然后基于搜索结果生成回答。这解决了大模型知识截止的问题，相当于内置 ",[861,10148,9029],{"href":9028},[47,10150,10151,10160],{},[50,10152,10153],{},[53,10154,10155,10158],{},[56,10156,10157],{},"流程",[56,10159,4109],{},[66,10161,10162,10170,10178,10186],{},[53,10163,10164,10167],{},[71,10165,10166],{},"1. 用户提问",[71,10168,10169],{},"\"今天上海股市怎么样\"",[53,10171,10172,10175],{},[71,10173,10174],{},"2. Kimi 触发搜索",[71,10176,10177],{},"调用搜索引擎获取最新新闻",[53,10179,10180,10183],{},[71,10181,10182],{},"3. 抽取关键段落",[71,10184,10185],{},"选 Top-K 段落作为上下文",[53,10187,10188,10191],{},[71,10189,10190],{},"4. 生成回答",[71,10192,10193],{},"基于搜索结果回答 + 标注来源",[30,10195,10196,10197,10200,10201,10204],{},"API 端通过 ",[139,10198,10199],{},"tools"," 字段开启 ",[139,10202,10203],{},"$web_search"," 工具调用即可。",[37,10206,7268],{"id":7268},[30,10208,10209],{},"CMMLU 89.4%，中文场景下的理解力和表达力优秀。特别适合：",[568,10211,10212,10215,10218,10221],{},[571,10213,10214],{},"中文合同条款分析",[571,10216,10217],{},"中文学术论文阅读",[571,10219,10220],{},"中文法律文书理解",[571,10222,10223],{},"古文 \u002F 文学作品分析",[26,10225,130],{"id":129},[30,10227,10228],{},"Kimi 提供 OpenAI 兼容 API：",[132,10230,10232],{"className":134,"code":10231,"language":136,"meta":137,"style":137},"from openai import OpenAI\n\nclient = OpenAI(\n    api_key=\"sk-...\",\n    base_url=\"https:\u002F\u002Fapi.moonshot.cn\u002Fv1\",\n)\n\nresp = client.chat.completions.create(\n    model=\"moonshot-v1-128k\",     # K2 旗舰\n    temperature=0.3,\n    messages=[\n        {\"role\": \"user\", \"content\": LONG_CONTRACT + \"\\n\\n找出风险点\"}\n    ],\n)\n",[139,10233,10234,10244,10248,10256,10266,10277,10281,10285,10293,10307,10317,10325,10357,10361],{"__ignoreMap":137},[142,10235,10236,10238,10240,10242],{"class":144,"line":145},[142,10237,149],{"class":148},[142,10239,3061],{"class":152},[142,10241,156],{"class":148},[142,10243,3066],{"class":152},[142,10245,10246],{"class":144,"line":162},[142,10247,178],{"emptyLinePlaceholder":177},[142,10249,10250,10252,10254],{"class":144,"line":174},[142,10251,165],{"class":152},[142,10253,168],{"class":148},[142,10255,3075],{"class":152},[142,10257,10258,10260,10262,10264],{"class":144,"line":181},[142,10259,3080],{"class":200},[142,10261,168],{"class":148},[142,10263,3085],{"class":206},[142,10265,210],{"class":152},[142,10267,10268,10270,10272,10275],{"class":144,"line":188},[142,10269,3092],{"class":200},[142,10271,168],{"class":148},[142,10273,10274],{"class":206},"\"https:\u002F\u002Fapi.moonshot.cn\u002Fv1\"",[142,10276,210],{"class":152},[142,10278,10279],{"class":144,"line":197},[142,10280,480],{"class":152},[142,10282,10283],{"class":144,"line":5},[142,10284,178],{"emptyLinePlaceholder":177},[142,10286,10287,10289,10291],{"class":144,"line":230},[142,10288,1007],{"class":152},[142,10290,168],{"class":148},[142,10292,2874],{"class":152},[142,10294,10295,10297,10299,10302,10304],{"class":144,"line":243},[142,10296,201],{"class":200},[142,10298,168],{"class":148},[142,10300,10301],{"class":206},"\"moonshot-v1-128k\"",[142,10303,4856],{"class":152},[142,10305,10306],{"class":184},"# K2 旗舰\n",[142,10308,10309,10311,10313,10315],{"class":144,"line":272},[142,10310,233],{"class":200},[142,10312,168],{"class":148},[142,10314,4868],{"class":220},[142,10316,210],{"class":152},[142,10318,10319,10321,10323],{"class":144,"line":284},[142,10320,246],{"class":200},[142,10322,168],{"class":148},[142,10324,342],{"class":152},[142,10326,10327,10329,10331,10333,10335,10337,10339,10341,10344,10346,10349,10352,10355],{"class":144,"line":299},[142,10328,1292],{"class":152},[142,10330,254],{"class":206},[142,10332,257],{"class":152},[142,10334,260],{"class":206},[142,10336,263],{"class":152},[142,10338,266],{"class":206},[142,10340,257],{"class":152},[142,10342,10343],{"class":220},"LONG_CONTRACT",[142,10345,5626],{"class":148},[142,10347,10348],{"class":206}," \"",[142,10350,10351],{"class":220},"\\n\\n",[142,10353,10354],{"class":206},"找出风险点\"",[142,10356,1316],{"class":152},[142,10358,10359],{"class":144,"line":471},[142,10360,1321],{"class":152},[142,10362,10363],{"class":144,"line":477},[142,10364,480],{"class":152},[37,10366,10368],{"id":10367},"文件直传独有特性","文件直传（独有特性）",[30,10370,10371],{},"Kimi 提供文件上传接口，可以把整份 PDF \u002F Word \u002F 长文档直接上传，无需自己做 OCR \u002F 解析：",[132,10373,10375],{"className":134,"code":10374,"language":136,"meta":137,"style":137},"file = client.files.create(\n    file=open(\"contract.pdf\", \"rb\"),\n    purpose=\"file-extract\",\n)\ncontent = client.files.content(file_id=file.id).text   # 返回提取后的文本\n\nresp = client.chat.completions.create(\n    model=\"moonshot-v1-128k\",\n    messages=[\n        {\"role\": \"system\", \"content\": content},        # 文件内容作为 system\n        {\"role\": \"user\", \"content\": \"总结要点\"},\n    ],\n)\n",[139,10376,10377,10388,10411,10423,10427,10450,10454,10462,10472,10480,10500,10521,10525],{"__ignoreMap":137},[142,10378,10379,10382,10385],{"class":144,"line":145},[142,10380,10381],{"class":200},"file",[142,10383,10384],{"class":148}," =",[142,10386,10387],{"class":152}," client.files.create(\n",[142,10389,10390,10393,10395,10398,10400,10403,10405,10408],{"class":144,"line":162},[142,10391,10392],{"class":200},"    file",[142,10394,168],{"class":148},[142,10396,10397],{"class":220},"open",[142,10399,1136],{"class":152},[142,10401,10402],{"class":206},"\"contract.pdf\"",[142,10404,263],{"class":152},[142,10406,10407],{"class":206},"\"rb\"",[142,10409,10410],{"class":152},"),\n",[142,10412,10413,10416,10418,10421],{"class":144,"line":174},[142,10414,10415],{"class":200},"    purpose",[142,10417,168],{"class":148},[142,10419,10420],{"class":206},"\"file-extract\"",[142,10422,210],{"class":152},[142,10424,10425],{"class":144,"line":181},[142,10426,480],{"class":152},[142,10428,10429,10432,10434,10437,10440,10442,10444,10447],{"class":144,"line":188},[142,10430,10431],{"class":152},"content ",[142,10433,168],{"class":148},[142,10435,10436],{"class":152}," client.files.content(",[142,10438,10439],{"class":200},"file_id",[142,10441,168],{"class":148},[142,10443,10381],{"class":200},[142,10445,10446],{"class":152},".id).text   ",[142,10448,10449],{"class":184},"# 返回提取后的文本\n",[142,10451,10452],{"class":144,"line":197},[142,10453,178],{"emptyLinePlaceholder":177},[142,10455,10456,10458,10460],{"class":144,"line":5},[142,10457,1007],{"class":152},[142,10459,168],{"class":148},[142,10461,2874],{"class":152},[142,10463,10464,10466,10468,10470],{"class":144,"line":230},[142,10465,201],{"class":200},[142,10467,168],{"class":148},[142,10469,10301],{"class":206},[142,10471,210],{"class":152},[142,10473,10474,10476,10478],{"class":144,"line":243},[142,10475,246],{"class":200},[142,10477,168],{"class":148},[142,10479,342],{"class":152},[142,10481,10482,10484,10486,10488,10490,10492,10494,10497],{"class":144,"line":272},[142,10483,1292],{"class":152},[142,10485,254],{"class":206},[142,10487,257],{"class":152},[142,10489,3990],{"class":206},[142,10491,263],{"class":152},[142,10493,266],{"class":206},[142,10495,10496],{"class":152},": content},        ",[142,10498,10499],{"class":184},"# 文件内容作为 system\n",[142,10501,10502,10504,10506,10508,10510,10512,10514,10516,10519],{"class":144,"line":284},[142,10503,1292],{"class":152},[142,10505,254],{"class":206},[142,10507,257],{"class":152},[142,10509,260],{"class":206},[142,10511,263],{"class":152},[142,10513,266],{"class":206},[142,10515,257],{"class":152},[142,10517,10518],{"class":206},"\"总结要点\"",[142,10520,1064],{"class":152},[142,10522,10523],{"class":144,"line":299},[142,10524,1321],{"class":152},[142,10526,10527],{"class":144,"line":471},[142,10528,480],{"class":152},[30,10530,10531],{},"这是国内模型里少数提供\"上传文件直接对话\"的，对法律 \u002F 学术场景非常实用。",[37,10533,10534],{"id":10534},"模型变种",[47,10536,10537,10547],{},[50,10538,10539],{},[53,10540,10541,10543,10545],{},[56,10542,619],{},[56,10544,2526],{},[56,10546,1362],{},[66,10548,10549,10562,10574,10586],{},[53,10550,10551,10556,10559],{},[71,10552,10553],{},[139,10554,10555],{},"moonshot-v1-8k",[71,10557,10558],{},"8K",[71,10560,10561],{},"¥12\u002FM Input",[53,10563,10564,10569,10571],{},[71,10565,10566],{},[139,10567,10568],{},"moonshot-v1-32k",[71,10570,1502],{},[71,10572,10573],{},"¥24\u002FM Input",[53,10575,10576,10581,10583],{},[71,10577,10578],{},[139,10579,10580],{},"moonshot-v1-128k",[71,10582,2537],{},[71,10584,10585],{},"¥60\u002FM Input",[53,10587,10588,10593,10595],{},[71,10589,10590],{},[139,10591,10592],{},"kimi-k2-0905-preview",[71,10594,4601],{},[71,10596,10597],{},"¥4\u002FM Input",[30,10599,10600,10601,10603],{},"新版 K2 系列降价显著，是性价比首选。",[139,10602,10580],{}," 老模型贵但稳定。",[26,10605,492],{"id":492},[47,10607,10608,10616],{},[50,10609,10610],{},[53,10611,10612,10614],{},[56,10613,501],{},[56,10615,1362],{},[66,10617,10618,10625,10633,10641],{},[53,10619,10620,10623],{},[71,10621,10622],{},"K2 Input",[71,10624,3364],{},[53,10626,10627,10630],{},[71,10628,10629],{},"K2 Output",[71,10631,10632],{},"¥12 \u002F 百万 token",[53,10634,10635,10638],{},[71,10636,10637],{},"文件解析",[71,10639,10640],{},"按文件大小计费",[53,10642,10643,10646],{},[71,10644,10645],{},"网页版",[71,10647,10648],{},"免费（带速率限制）",[30,10650,10651,10652,10654,10655,10657,10658,10661],{},"API 价格高于 ",[861,10653,2504],{"href":4486},"（¥2\u002FM）和 ",[861,10656,680],{"href":3653},"（¥1\u002FM），但",[123,10659,10660],{},"网页版完全免费","是一大杀手锏——团队成员可以直接在 kimi.com 上传文档对话，零成本。",[26,10663,10664],{"id":10664},"网页版的隐形优势",[30,10666,10667],{},"Kimi 网页版的免费策略让它成为很多团队的\"首选试水模型\"：",[568,10669,10670,10673,10676,10679],{},[571,10671,10672],{},"产品经理直接拖文档分析需求",[571,10674,10675],{},"法务直接上传合同找风险",[571,10677,10678],{},"学生写论文做综述",[571,10680,10681],{},"程序员临时分析一份 log",[30,10683,10684],{},"很多企业最初接触 AI 就是从 Kimi 开始。但要进生产环境就要切到付费 API。",[26,10686,10688],{"id":10687},"kimi-vs-其他国产模型","Kimi vs 其他国产模型",[47,10690,10691,10705],{},[50,10692,10693],{},[53,10694,10695,10697,10699,10701,10703],{},[56,10696,1451],{},[56,10698,4618],{},[56,10700,2504],{},[56,10702,680],{},[56,10704,4347],{},[66,10706,10707,10720,10732,10744,10758,10771,10784],{},[53,10708,10709,10712,10714,10716,10718],{},[71,10710,10711],{},"长文本",[71,10713,644],{},[71,10715,1601],{},[71,10717,1601],{},[71,10719,1601],{},[53,10721,10722,10724,10726,10728,10730],{},[71,10723,1471],{},[71,10725,1601],{},[71,10727,692],{},[71,10729,692],{},[71,10731,1601],{},[53,10733,10734,10736,10738,10740,10742],{},[71,10735,7268],{},[71,10737,644],{},[71,10739,644],{},[71,10741,692],{},[71,10743,692],{},[53,10745,10746,10748,10751,10754,10756],{},[71,10747,10139],{},[71,10749,10750],{},"✅ 内置",[71,10752,10753],{},"✅ 工具",[71,10755,2601],{},[71,10757,1522],{},[53,10759,10760,10763,10765,10767,10769],{},[71,10761,10762],{},"文件直传",[71,10764,1522],{},[71,10766,2601],{},[71,10768,2601],{},[71,10770,2601],{},[53,10772,10773,10776,10778,10780,10782],{},[71,10774,10775],{},"网页版免费",[71,10777,1522],{},[71,10779,1522],{},[71,10781,1522],{},[71,10783,1522],{},[53,10785,10786,10789,10792,10794,10796],{},[71,10787,10788],{},"API 价格",[71,10790,10791],{},"¥4\u002F¥12",[71,10793,4359],{},[71,10795,4356],{},[71,10797,4362],{},[30,10799,10800,2617],{},[123,10801,3464],{},[568,10803,10804,10807,10810,10813],{},[571,10805,10806],{},"长文档分析 \u002F 文件直传 → Kimi K2",[571,10808,10809],{},"编程 → GLM-5.2 \u002F DeepSeek-V3",[571,10811,10812],{},"低成本批量 → DeepSeek-V3 \u002F Qwen 3",[571,10814,10815],{},"多语言 → Qwen 3",[26,10817,6036],{"id":6035},[30,10819,788],{},[568,10821,10822,10825,10828,10831,10834,10837],{},[571,10823,10824],{},"法律文书 \u002F 合同分析",[571,10826,10827],{},"学术论文阅读 \u002F 综述",[571,10829,10830],{},"财报 \u002F 招股书分析",[571,10832,10833],{},"多文档对比 \u002F 综合",[571,10835,10836],{},"联网搜索 + 实时问答",[571,10838,10839],{},"文件直接上传场景",[30,10841,808],{},[568,10843,10844,10847,10850,10853],{},[571,10845,10846],{},"AI 编程主力（HumanEval \u002F SWE-bench 不行）",[571,10848,10849],{},"Agent 复杂工具调用",[571,10851,10852],{},"实时聊天（速度一般）",[571,10854,10855],{},"极致低成本场景（V3 \u002F Qwen 更便宜）",[26,10857,825],{"id":825},[568,10859,10860,10866,10877,10886,10892],{},[571,10861,10862,10865],{},[123,10863,10864],{},"8K 输出限制","：长文本输入虽然支持 128K-256K，但 output 仍 8K。长报告生成要分段。",[571,10867,10868,10873,10874,126],{},[123,10869,10870,10872],{},[139,10871,10580],{}," 价格高","：60\u002F180 价格远超新版 K2（4\u002F12）。务必用新版 ",[139,10875,10876],{},"kimi-k2-*-preview",[571,10878,10879,10882,10883,10885],{},[123,10880,10881],{},"联网搜索单独计费","：每次调用 ",[139,10884,10203],{}," 工具计费，批量任务要关注账单。",[571,10887,10888,10891],{},[123,10889,10890],{},"文件解析有大小上限","：单文件 100MB，超长卷宗需要分块上传。",[571,10893,10894,10897],{},[123,10895,10896],{},"没有 batch API","：大批量离线场景不如 DeepSeek-V3。",[26,10899,854],{"id":854},[568,10901,10902,10907,10913,10918],{},[571,10903,10904,10905],{},"长文本相关：",[861,10906,883],{"href":882},[571,10908,4483,10909,865,10911],{},[861,10910,2504],{"href":4486},[861,10912,680],{"href":3653},[571,10914,10915,10916],{},"联网搜索原理：",[861,10917,9029],{"href":9028},[571,10919,10920,10921],{},"检索增强：",[861,10922,5371],{"href":5370},[885,10924,5378],{},{"title":137,"searchDepth":174,"depth":174,"links":10926},[10927,10928,10933,10937,10938,10939,10940,10941,10942],{"id":28,"depth":162,"text":28},{"id":35,"depth":162,"text":35,"children":10929},[10930,10931,10932],{"id":10116,"depth":174,"text":10116},{"id":10139,"depth":174,"text":10139},{"id":7268,"depth":174,"text":7268},{"id":129,"depth":162,"text":130,"children":10934},[10935,10936],{"id":10367,"depth":174,"text":10368},{"id":10534,"depth":174,"text":10534},{"id":492,"depth":162,"text":492},{"id":10664,"depth":162,"text":10664},{"id":10687,"depth":162,"text":10688},{"id":6035,"depth":162,"text":6036},{"id":825,"depth":162,"text":825},{"id":854,"depth":162,"text":854},"月之暗面 Kimi K2 超长文本旗舰模型，单次可吞下数百万字上下文，中文长文档分析、合同 \u002F 论文 \u002F 财报理解能力国产最强之一，网页版完全免费可用，API 提供慷慨免费额度，长文场景首选。",{},"\u002Fmodels\u002Fkimi-k2","Input ¥4\u002FM · Output ¥12\u002FM（免费 API 额度）",[3699],"2025-10-15",{"title":4618,"description":10943},"kimi-k2","models\u002Fkimi-k2",[10953,10954,3709,10955,10956],"超长文本处理能力国产最强之一","中文阅读理解优秀，适合文档分析","Kimi 网页版免费使用，无门槛","支持联网搜索，回答有最新信息",[10958,10959,10960,10961],"超长文档分析（合同\u002F论文\u002F财报）","中文阅读理解与摘要","联网搜索问答","学术研究辅助","月之暗面","Moonshot AI",[10965,10966,10967,10968],"编程能力不如 Claude\u002FGLM-5.2","8K 输出窗口偏短","Agent\u002F工具调用能力一般","API 价格高于 GLM-5.2","Gnl8zj20R5iquXAmD8YupcUqSTJgGJ80cyM4NI0vlKg",{"id":10971,"title":10972,"apiCompatible":10973,"benchmarks":10974,"body":10980,"category":907,"contextWindow":5397,"description":11937,"extension":910,"maxOutput":911,"meta":11938,"navigation":177,"path":11939,"pricing":11940,"published":915,"relatedTools":11941,"releaseDate":11943,"seo":11944,"slug":11945,"stem":11946,"strengths":11947,"updated":915,"useCases":11953,"vendor":11958,"vendorEn":11958,"weaknesses":11959,"__hash__":11964},"models\u002Fmodels\u002Fllama-4.md","Llama 4",[2752],[10975,10977,10978],{"name":14,"score":10976},"52.3%",{"name":17,"score":10102},{"name":20,"score":10979},"80.2%",{"type":23,"value":10981,"toc":11914},[10982,10984,10991,10994,10996,10999,11002,11029,11032,11035,11123,11127,11137,11139,11142,11174,11177,11181,11184,11223,11227,11284,11287,11291,11319,11321,11324,11470,11472,11475,11535,11539,11542,11558,11561,11564,11629,11639,11641,11755,11759,11779,11781,11783,11808,11810,11824,11826,11879,11881,11911],[26,10983,28],{"id":28},[30,10985,10986,10987,10990],{},"Llama 4 是 Meta 于 2025 年 7 月发布的开源旗舰模型系列。最大价值是",[123,10988,10989],{},"完全免费可商用","——没有 API 费用，只有 GPU 成本。社区生态最强，工具链最完善。",[30,10992,10993],{},"Llama 系列在开源大模型中处于 \"事实标准\" 地位——Hugging Face 上绝大多数微调模型、量化版本、RAG 教程都基于 Llama。即便实际效果未必是同档最好，\"会用 Llama\" 是企业 AI 团队的基本素质。",[26,10995,35],{"id":35},[37,10997,10998],{"id":10998},"完全开源",[30,11000,11001],{},"Llama 4 采用 Meta 自有的开源协议（Llama Community License），允许商用。这意味着：",[568,11003,11004,11010,11016,11019,11022],{},[571,11005,11006,11007],{},"企业可以自由部署，",[123,11008,11009],{},"数据不出内网",[571,11011,11012,11013,11015],{},"可以对模型进行微调（",[861,11014,3592],{"href":3591}," \u002F 全量 SFT 都行）",[571,11017,11018],{},"可以集成到自己的产品中并商业化",[571,11020,11021],{},"无 API 调用费用，无 token 计费",[571,11023,11024,11025,11028],{},"输出内容",[123,11026,11027],{},"不受任何厂商内容审查策略","约束",[30,11030,11031],{},"注意\"完全开源\"有边界：月活 > 7 亿的产品需要额外申请商用授权（基本只针对 Meta 的直接竞品）；普通企业用免费即可。",[37,11033,11034],{"id":11034},"多尺寸版本",[47,11036,11037,11051],{},[50,11038,11039],{},[53,11040,11041,11044,11047,11049],{},[56,11042,11043],{},"版本",[56,11045,11046],{},"参数量",[56,11048,3006],{},[56,11050,2458],{},[66,11052,11053,11067,11081,11095,11109],{},[53,11054,11055,11058,11061,11064],{},[71,11056,11057],{},"Llama 4 8B",[71,11059,11060],{},"8B",[71,11062,11063],{},"8-16GB",[71,11065,11066],{},"个人本地、IoT、浏览器 WASM",[53,11068,11069,11072,11075,11078],{},[71,11070,11071],{},"Llama 4 70B",[71,11073,11074],{},"70B",[71,11076,11077],{},"48-128GB",[71,11079,11080],{},"中小企业部署",[53,11082,11083,11086,11089,11092],{},[71,11084,11085],{},"Llama 4 405B",[71,11087,11088],{},"405B",[71,11090,11091],{},"多卡 H100（>800GB）",[71,11093,11094],{},"云端高性能",[53,11096,11097,11100,11103,11106],{},[71,11098,11099],{},"Llama 4 Maverick",[71,11101,11102],{},"17B 激活 \u002F 400B 总 MoE",[71,11104,11105],{},"8×H100",[71,11107,11108],{},"平衡型",[53,11110,11111,11114,11117,11120],{},[71,11112,11113],{},"Llama 4 Scout",[71,11115,11116],{},"17B 激活 \u002F 109B 总 MoE",[71,11118,11119],{},"4×H100",[71,11121,11122],{},"10M 超长上下文（实验性）",[37,11124,11126],{"id":11125},"scout-的-10m-上下文","Scout 的 10M 上下文",[30,11128,11129,11130,11133,11134,11136],{},"Llama 4 Scout 号称支持 10M token 上下文——",[123,11131,11132],{},"业界最长","，远超 ",[861,11135,1561],{"href":5445},"（1M）。但 Meta 自己也承认这是\"实验性\"，超过 1M 后质量下降明显，长文 benchmark 上不如 Gemini Pro。当前最实用范围在 256K-512K。",[37,11138,5137],{"id":5137},[30,11140,11141],{},"Llama 的社区生态是所有开源模型中最强的：",[568,11143,11144,11153,11156,11159,11165,11168,11171],{},[571,11145,11146,865,11148,11152],{},[861,11147,3865],{"href":3864},[861,11149,11151],{"href":11150},"\u002Fcoding\u002Flocal\u002Flm-studio.html","LM Studio"," 一键本地部署",[571,11154,11155],{},"Hugging Face 上有数万微调版本（医疗、法律、编程、角色扮演各领域）",[571,11157,11158],{},"vLLM \u002F TGI \u002F SGLang 高性能推理框架",[571,11160,11161,11162,11164],{},"LangChain \u002F LlamaIndex \u002F ",[861,11163,4316],{"href":4315}," 原生支持",[571,11166,11167],{},"大量量化版本（GGUF \u002F AWQ \u002F GPTQ \u002F EXL2 \u002F MLX）",[571,11169,11170],{},"Apple Silicon 上跑 MLX 版本，M2\u002FM3 Max 跑 70B 完全可行",[571,11172,11173],{},"各种 fine-tune 框架（Unsloth \u002F Axolotl \u002F torchtune）默认支持",[26,11175,11176],{"id":11176},"部署方式",[37,11178,11180],{"id":11179},"本地个人开发者","本地（个人开发者）",[30,11182,11183],{},"最简单：",[132,11185,11187],{"className":4040,"code":11186,"language":4042,"meta":137,"style":137},"# Ollama 一键\nollama pull llama4:8b\nollama run llama4:8b\n\n# 或者通过 LM Studio 图形界面\n",[139,11188,11189,11194,11205,11214,11218],{"__ignoreMap":137},[142,11190,11191],{"class":144,"line":145},[142,11192,11193],{"class":184},"# Ollama 一键\n",[142,11195,11196,11199,11202],{"class":144,"line":162},[142,11197,11198],{"class":4074},"ollama",[142,11200,11201],{"class":206}," pull",[142,11203,11204],{"class":206}," llama4:8b\n",[142,11206,11207,11209,11212],{"class":144,"line":174},[142,11208,11198],{"class":4074},[142,11210,11211],{"class":206}," run",[142,11213,11204],{"class":206},[142,11215,11216],{"class":144,"line":181},[142,11217,178],{"emptyLinePlaceholder":177},[142,11219,11220],{"class":144,"line":188},[142,11221,11222],{"class":184},"# 或者通过 LM Studio 图形界面\n",[37,11224,11226],{"id":11225},"企业自部署vllm","企业自部署（vLLM）",[132,11228,11230],{"className":4040,"code":11229,"language":4042,"meta":137,"style":137},"vllm serve meta-llama\u002FLlama-4-70B-Instruct \\\n    --tensor-parallel-size 4 \\\n    --max-model-len 131072 \\\n    --enable-prefix-caching \\\n    --quantization awq      # 4-bit 量化省显存\n",[139,11231,11232,11246,11256,11266,11273],{"__ignoreMap":137},[142,11233,11234,11237,11240,11243],{"class":144,"line":145},[142,11235,11236],{"class":4074},"vllm",[142,11238,11239],{"class":206}," serve",[142,11241,11242],{"class":206}," meta-llama\u002FLlama-4-70B-Instruct",[142,11244,11245],{"class":220}," \\\n",[142,11247,11248,11251,11254],{"class":144,"line":162},[142,11249,11250],{"class":220},"    --tensor-parallel-size",[142,11252,11253],{"class":220}," 4",[142,11255,11245],{"class":220},[142,11257,11258,11261,11264],{"class":144,"line":174},[142,11259,11260],{"class":220},"    --max-model-len",[142,11262,11263],{"class":220}," 131072",[142,11265,11245],{"class":220},[142,11267,11268,11271],{"class":144,"line":181},[142,11269,11270],{"class":220},"    --enable-prefix-caching",[142,11272,11245],{"class":220},[142,11274,11275,11278,11281],{"class":144,"line":188},[142,11276,11277],{"class":220},"    --quantization",[142,11279,11280],{"class":206}," awq",[142,11282,11283],{"class":184},"      # 4-bit 量化省显存\n",[30,11285,11286],{},"OpenAI 兼容 API 自动暴露在 8000 端口，所有支持 OpenAI 的工具直接用。",[37,11288,11290],{"id":11289},"云上托管不想自己运维","云上托管（不想自己运维）",[568,11292,11293,11298,11304,11313],{},[571,11294,11295,11297],{},[123,11296,2429],{},"：Llama 4 70B \u002F 405B 都有，按 token 计费但比 Claude 便宜",[571,11299,11300,11303],{},[123,11301,11302],{},"Groq","：Llama 系列的极速 inference 服务，70B 跑 500 tok\u002Fs（业界最快）",[571,11305,11306,865,11309,11312],{},[123,11307,11308],{},"Together AI",[123,11310,11311],{},"Replicate","：开发者友好定价",[571,11314,11315,11318],{},[123,11316,11317],{},"国内","：硅基流动 \u002F 阿里云百炼也有部分 Llama 部署",[26,11320,130],{"id":129},[30,11322,11323],{},"任何 OpenAI 兼容客户端都能用：",[132,11325,11327],{"className":134,"code":11326,"language":136,"meta":137,"style":137},"from openai import OpenAI\n\n# 本地 vLLM \u002F Ollama\nclient = OpenAI(api_key=\"dummy\", base_url=\"http:\u002F\u002Flocalhost:8000\u002Fv1\")\n\n# Groq（云端最快）\nclient = OpenAI(api_key=\"gsk_...\", base_url=\"https:\u002F\u002Fapi.groq.com\u002Fopenai\u002Fv1\")\n\nresp = client.chat.completions.create(\n    model=\"llama-4-70b\",\n    temperature=0.7,\n    messages=[{\"role\": \"user\", \"content\": \"...\"}],\n)\n",[139,11328,11329,11339,11343,11348,11374,11378,11383,11409,11413,11421,11432,11442,11466],{"__ignoreMap":137},[142,11330,11331,11333,11335,11337],{"class":144,"line":145},[142,11332,149],{"class":148},[142,11334,3061],{"class":152},[142,11336,156],{"class":148},[142,11338,3066],{"class":152},[142,11340,11341],{"class":144,"line":162},[142,11342,178],{"emptyLinePlaceholder":177},[142,11344,11345],{"class":144,"line":174},[142,11346,11347],{"class":184},"# 本地 vLLM \u002F Ollama\n",[142,11349,11350,11352,11354,11356,11358,11360,11363,11365,11367,11369,11372],{"class":144,"line":181},[142,11351,165],{"class":152},[142,11353,168],{"class":148},[142,11355,9307],{"class":152},[142,11357,1836],{"class":200},[142,11359,168],{"class":148},[142,11361,11362],{"class":206},"\"dummy\"",[142,11364,263],{"class":152},[142,11366,5319],{"class":200},[142,11368,168],{"class":148},[142,11370,11371],{"class":206},"\"http:\u002F\u002Flocalhost:8000\u002Fv1\"",[142,11373,480],{"class":152},[142,11375,11376],{"class":144,"line":188},[142,11377,178],{"emptyLinePlaceholder":177},[142,11379,11380],{"class":144,"line":197},[142,11381,11382],{"class":184},"# Groq（云端最快）\n",[142,11384,11385,11387,11389,11391,11393,11395,11398,11400,11402,11404,11407],{"class":144,"line":5},[142,11386,165],{"class":152},[142,11388,168],{"class":148},[142,11390,9307],{"class":152},[142,11392,1836],{"class":200},[142,11394,168],{"class":148},[142,11396,11397],{"class":206},"\"gsk_...\"",[142,11399,263],{"class":152},[142,11401,5319],{"class":200},[142,11403,168],{"class":148},[142,11405,11406],{"class":206},"\"https:\u002F\u002Fapi.groq.com\u002Fopenai\u002Fv1\"",[142,11408,480],{"class":152},[142,11410,11411],{"class":144,"line":230},[142,11412,178],{"emptyLinePlaceholder":177},[142,11414,11415,11417,11419],{"class":144,"line":243},[142,11416,1007],{"class":152},[142,11418,168],{"class":148},[142,11420,2874],{"class":152},[142,11422,11423,11425,11427,11430],{"class":144,"line":272},[142,11424,201],{"class":200},[142,11426,168],{"class":148},[142,11428,11429],{"class":206},"\"llama-4-70b\"",[142,11431,210],{"class":152},[142,11433,11434,11436,11438,11440],{"class":144,"line":284},[142,11435,233],{"class":200},[142,11437,168],{"class":148},[142,11439,2258],{"class":220},[142,11441,210],{"class":152},[142,11443,11444,11446,11448,11450,11452,11454,11456,11458,11460,11462,11464],{"class":144,"line":299},[142,11445,246],{"class":200},[142,11447,168],{"class":148},[142,11449,251],{"class":152},[142,11451,254],{"class":206},[142,11453,257],{"class":152},[142,11455,260],{"class":206},[142,11457,263],{"class":152},[142,11459,266],{"class":206},[142,11461,257],{"class":152},[142,11463,4815],{"class":206},[142,11465,442],{"class":152},[142,11467,11468],{"class":144,"line":471},[142,11469,480],{"class":152},[26,11471,492],{"id":492},[30,11473,11474],{},"模型本身免费。成本仅为 GPU 运行费用：",[47,11476,11477,11486],{},[50,11478,11479],{},[53,11480,11481,11483],{},[56,11482,11176],{},[56,11484,11485],{},"月成本估算（中等负载）",[66,11487,11488,11496,11503,11511,11519,11527],{},[53,11489,11490,11493],{},[71,11491,11492],{},"本地（8B，消费级 GPU）",[71,11494,11495],{},"¥0（电费）",[53,11497,11498,11501],{},[71,11499,11500],{},"本地（70B，Mac Studio M3 Max）",[71,11502,11495],{},[53,11504,11505,11508],{},[71,11506,11507],{},"云 GPU（70B，A100 80G）",[71,11509,11510],{},"~¥3,000-5,000\u002F月",[53,11512,11513,11516],{},[71,11514,11515],{},"云 GPU（405B，8×H100）",[71,11517,11518],{},"~¥30,000-50,000\u002F月",[53,11520,11521,11524],{},[71,11522,11523],{},"Groq 70B API",[71,11525,11526],{},"$0.59\u002FM Input · $0.79\u002FM Output",[53,11528,11529,11532],{},[71,11530,11531],{},"AWS Bedrock 70B",[71,11533,11534],{},"$0.72\u002FM Input · $0.72\u002FM Output",[37,11536,11538],{"id":11537},"自部署-roi-计算","自部署 ROI 计算",[30,11540,11541],{},"什么时候自部署划算？",[568,11543,11544,11547,11555],{},[571,11545,11546],{},"如果月 token 用量 > 100 亿，且能接受 70B 而非旗舰：自部署 70B 比走 Claude API 便宜 20-50 倍",[571,11548,11549,11550,865,11552,11554],{},"如果月用量 \u003C 1 亿：直接走 ",[861,11551,680],{"href":3653},[861,11553,2504],{"href":4486}," API 更省事",[571,11556,11557],{},"如果是数据合规驱动（医疗\u002F金融\u002F政府）：自部署是必选项，不算 ROI",[26,11559,11560],{"id":11560},"微调入口",[30,11562,11563],{},"Llama 4 是微调最方便的开源模型，主流 fine-tune 工具链：",[47,11565,11566,11579],{},[50,11567,11568],{},[53,11569,11570,11573,11576],{},[56,11571,11572],{},"工具",[56,11574,11575],{},"适合人群",[56,11577,11578],{},"特点",[66,11580,11581,11596,11607,11618],{},[53,11582,11583,11590,11593],{},[71,11584,11585],{},[861,11586,11589],{"href":11587,"rel":11588},"https:\u002F\u002Fgithub.com\u002Funslothai\u002Funsloth",[3857],"Unsloth",[71,11591,11592],{},"个人 \u002F 小团队",[71,11594,11595],{},"单卡 7B QLoRA 几小时",[53,11597,11598,11601,11604],{},[71,11599,11600],{},"Axolotl",[71,11602,11603],{},"企业",[71,11605,11606],{},"配置化、多任务",[53,11608,11609,11612,11615],{},[71,11610,11611],{},"torchtune",[71,11613,11614],{},"研究",[71,11616,11617],{},"PyTorch 官方，灵活",[53,11619,11620,11623,11626],{},[71,11621,11622],{},"TRL",[71,11624,11625],{},"RLHF \u002F DPO",[71,11627,11628],{},"HuggingFace 出品",[30,11630,11631,11632,2113,11634,11638],{},"详见 ",[861,11633,3592],{"href":3591},[861,11635,11637],{"href":11636},"\u002Fwiki\u002Ffine-tuning-vs-rag.html","Fine-tuning vs RAG","——大多数业务问题用 RAG 解决，确实需要\"教模型新行为\"才上微调。",[26,11640,7011],{"id":7011},[47,11642,11643,11657],{},[50,11644,11645],{},[53,11646,11647,11649,11651,11653,11655],{},[56,11648,1451],{},[56,11650,10972],{},[56,11652,2504],{},[56,11654,680],{},[56,11656,4347],{},[66,11658,11659,11673,11688,11700,11712,11725,11738],{},[53,11660,11661,11663,11665,11668,11671],{},[71,11662,2987],{},[71,11664,4402],{},[71,11666,11667],{},"部分（GLM-4 系列开源）",[71,11669,11670],{},"✅ MoE",[71,11672,4408],{},[53,11674,11675,11678,11681,11684,11686],{},[71,11676,11677],{},"商用",[71,11679,11680],{},"✅（\u003C 7 亿 MAU）",[71,11682,11683],{},"需授权",[71,11685,3517],{},[71,11687,1522],{},[53,11689,11690,11692,11694,11696,11698],{},[71,11691,1471],{},[71,11693,1601],{},[71,11695,692],{},[71,11697,692],{},[71,11699,1601],{},[53,11701,11702,11704,11706,11708,11710],{},[71,11703,8073],{},[71,11705,1601],{},[71,11707,644],{},[71,11709,692],{},[71,11711,692],{},[53,11713,11714,11717,11719,11721,11723],{},[71,11715,11716],{},"多语言",[71,11718,692],{},[71,11720,1601],{},[71,11722,1601],{},[71,11724,644],{},[53,11726,11727,11730,11732,11734,11736],{},[71,11728,11729],{},"社区",[71,11731,644],{},[71,11733,1601],{},[71,11735,692],{},[71,11737,644],{},[53,11739,11740,11743,11746,11749,11752],{},[71,11741,11742],{},"端侧模型",[71,11744,11745],{},"8B 一档",[71,11747,11748],{},"9B 一档",[71,11750,11751],{},"蒸馏版",[71,11753,11754],{},"0.5B-14B 全覆盖",[30,11756,11757,2617],{},[123,11758,3464],{},[568,11760,11761,11764,11771,11776],{},[571,11762,11763],{},"需要私有化部署且中文要求不高 → Llama 4（社区生态最强）",[571,11765,11766,11767,9010,11769],{},"中文场景 → ",[861,11768,2504],{"href":4486},[861,11770,4347],{"href":4489},[571,11772,11773,11774],{},"极致低成本（API） → ",[861,11775,680],{"href":3653},[571,11777,11778],{},"端侧多尺寸 → Qwen 3",[26,11780,6036],{"id":6035},[30,11782,788],{},[568,11784,11785,11788,11791,11794,11802,11805],{},[571,11786,11787],{},"企业私有化部署（数据不出内网）",[571,11789,11790],{},"模型微调研究 \u002F 实验",[571,11792,11793],{},"海外多语言应用",[571,11795,11796,11797,3593],{},"角色扮演 \u002F 内容创作（无审查限制，",[861,11798,11801],{"href":11799,"rel":11800},"https:\u002F\u002Fhuggingface.co\u002Fmodels?search=llama+uncensored",[3857],"Hugging Face 上有大量 uncensored fine-tune",[571,11803,11804],{},"学术研究 \u002F 教学",[571,11806,11807],{},"AI 产品 PoC（先用 Llama 验证，再迁到 API）",[30,11809,808],{},[568,11811,11812,11815,11818,11821],{},[571,11813,11814],{},"主力编程（不如 Claude \u002F GLM）",[571,11816,11817],{},"纯中文场景（不如国产）",[571,11819,11820],{},"团队没运维能力（API 模型更省心）",[571,11822,11823],{},"极致质量场景（开源旗舰仍落后闭源旗舰半代）",[26,11825,825],{"id":825},[568,11827,11828,11834,11840,11853,11861,11867,11873],{},[571,11829,11830,11833],{},[123,11831,11832],{},"8B \u002F 70B \u002F 405B 跨档差距巨大","：用 8B 跑不起来的任务不是 Llama 不行，是参数量不够，先升 70B 再下结论。",[571,11835,11836,11839],{},[123,11837,11838],{},"量化精度不要太低","：q4 \u002F Q4_K_M 是底线，q2 \u002F q3 质量崩坏严重；fp16 \u002F Q8_0 才是真实质量。",[571,11841,11842,11848,11849,11852],{},[123,11843,11844,11847],{},[139,11845,11846],{},"Llama-4-*-Instruct"," vs base","：基础模型（base）不会聊天，必须用 ",[139,11850,11851],{},"-Instruct"," 版本；新手常踩。",[571,11854,11855,11858,11859,126],{},[123,11856,11857],{},"Scout 10M 上下文别迷信","：超过 1M 后实测召回率断崖，长文场景仍推荐 ",[861,11860,1561],{"href":5445},[571,11862,11863,11866],{},[123,11864,11865],{},"中文场景慎用","：训练语料中文占比低，中文流畅度不如国产；要在中文上用 Llama，先看有没有中文社区微调版本（如 Llama-4-Chinese）。",[571,11868,11869,11872],{},[123,11870,11871],{},"Llama License 不是 MIT","：商用前看条款（7 亿 MAU 限制、不得用 Llama 输出训练非 Llama 模型等）。",[571,11874,11875,11878],{},[123,11876,11877],{},"Groq 限流严格","：免费档 RPM 很低，生产用要付费升级。",[26,11880,854],{"id":854},[568,11882,11883,11890,11895,11900,11906],{},[571,11884,11885,11886,865,11888],{},"本地部署：",[861,11887,3865],{"href":3864},[861,11889,11151],{"href":11150},[571,11891,11892,11893],{},"微调方法：",[861,11894,3592],{"href":3591},[571,11896,11897,11898],{},"何时微调：",[861,11899,11637],{"href":11636},[571,11901,6144,11902,865,11904],{},[861,11903,680],{"href":3653},[861,11905,4347],{"href":4489},[571,11907,11908,11909],{},"推理优化：",[861,11910,876],{"href":875},[885,11912,11913],{},"html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html pre.shiki code .sScJk, html code.shiki .sScJk{--shiki-default:#6F42C1;--shiki-dark:#B392F0}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .s4XuR, html code.shiki .s4XuR{--shiki-default:#E36209;--shiki-dark:#FFAB70}",{"title":137,"searchDepth":174,"depth":174,"links":11915},[11916,11917,11923,11928,11929,11932,11933,11934,11935,11936],{"id":28,"depth":162,"text":28},{"id":35,"depth":162,"text":35,"children":11918},[11919,11920,11921,11922],{"id":10998,"depth":174,"text":10998},{"id":11034,"depth":174,"text":11034},{"id":11125,"depth":174,"text":11126},{"id":5137,"depth":174,"text":5137},{"id":11176,"depth":162,"text":11176,"children":11924},[11925,11926,11927],{"id":11179,"depth":174,"text":11180},{"id":11225,"depth":174,"text":11226},{"id":11289,"depth":174,"text":11290},{"id":129,"depth":162,"text":130},{"id":492,"depth":162,"text":492,"children":11930},[11931],{"id":11537,"depth":174,"text":11538},{"id":11560,"depth":162,"text":11560},{"id":7011,"depth":162,"text":7011},{"id":6035,"depth":162,"text":6036},{"id":825,"depth":162,"text":825},{"id":854,"depth":162,"text":854},"Meta Llama 4 开源旗舰大模型，完全免费可商用 + 多模态原生支持，HuggingFace 社区生态与微调工具链最完整，权重开放支持 LoRA \u002F 全参微调与私有化部署，适合学术研究、企业自托管与合规敏感场景。",{},"\u002Fmodels\u002Fllama-4","开源免费（自行部署成本仅 GPU）",[3700,11942],"coding\u002Flocal\u002Flm-studio","2025-07-23",{"title":10972,"description":11937},"llama-4","models\u002Fllama-4",[11948,11949,11950,11951,11952],"完全开源，可商用，无使用限制","Scout 版本 10M 超长上下文（实验性）","社区生态最强，工具链完善","多尺寸可选（8B\u002F70B\u002F405B\u002FMaverick\u002FScout）","支持多语言",[11954,11955,11956,11957],"私有化部署（数据不出企业）","学术研究与模型微调","低成本批量处理","定制化模型开发","Meta",[11960,11961,11962,11963],"编程能力弱于 Claude\u002FGPT\u002FGLM","需要自行部署，运维门槛高","10M 上下文为实验性，质量不稳定","中文能力不如国产模型","mQZsmfQMT0dg8BGW1xKfxA6rDL_Y4JowE3kZfNPZXCA",{"id":11966,"title":4347,"apiCompatible":11967,"benchmarks":11968,"body":11974,"category":907,"contextWindow":12988,"description":12989,"extension":910,"maxOutput":911,"meta":12990,"navigation":177,"path":12991,"pricing":12992,"published":915,"relatedTools":12993,"releaseDate":12994,"seo":12995,"slug":12996,"stem":12997,"strengths":12998,"updated":915,"useCases":13004,"vendor":13009,"vendorEn":13010,"weaknesses":13011,"__hash__":13015},"models\u002Fmodels\u002Fqwen-3.md",[2752],[11969,11970,11971,11973],{"name":14,"score":4373},{"name":17,"score":5196},{"name":20,"score":11972},"83.2%",{"name":3733,"score":5210},{"type":23,"value":11975,"toc":12967},[11976,11978,11981,11983,11986,12053,12060,12064,12075,12164,12170,12172,12175,12178,12181,12183,12186,12311,12315,12342,12345,12413,12417,12420,12459,12462,12464,12557,12562,12564,12603,12609,12611,12697,12700,12808,12812,12829,12832,12835,12887,12890,12892,12940,12942,12965],[26,11977,28],{"id":28},[30,11979,11980],{},"Qwen 3 是阿里巴巴于 2025 年 9 月发布的第三代通义千问模型系列，覆盖从 0.5B 到 235B 的全尺寸。最大特色是端侧小模型（7B\u002F14B）在同参数级别中性能领先，适合本地部署和移动端使用。",[26,11982,35],{"id":35},[37,11984,11985],{"id":11985},"全尺寸开源",[47,11987,11988,11998],{},[50,11989,11990],{},[53,11991,11992,11994,11996],{},[56,11993,11046],{},[56,11995,2458],{},[56,11997,3006],{},[66,11999,12000,12011,12021,12031,12042],{},[53,12001,12002,12005,12008],{},[71,12003,12004],{},"0.5B \u002F 1.5B",[71,12006,12007],{},"手机端、IoT、浏览器 WASM",[71,12009,12010],{},"\u003C 2GB",[53,12012,12013,12016,12019],{},[71,12014,12015],{},"7B \u002F 14B",[71,12017,12018],{},"本地开发、个人使用",[71,12020,11063],{},[53,12022,12023,12026,12029],{},[71,12024,12025],{},"32B \u002F 72B",[71,12027,12028],{},"企业部署",[71,12030,11077],{},[53,12032,12033,12036,12039],{},[71,12034,12035],{},"Qwen3-MoE-A14B-72B",[71,12037,12038],{},"性价比之选",[71,12040,12041],{},"64-96GB",[53,12043,12044,12047,12050],{},[71,12045,12046],{},"235B",[71,12048,12049],{},"云端 API",[71,12051,12052],{},"多卡 H100",[30,12054,12055,12056,12059],{},"端侧 7B 模型在 MMLU 上达到 75%+，足以胜任日常对话、简单编程、文档摘要等任务。",[123,12057,12058],{},"这是 Qwen 系列最大的差异化优势","——你能拿 7B 跑出接近 GPT-3.5 体验的本地模型，对隐私敏感的个人和企业是刚需。",[37,12061,12063],{"id":12062},"hybrid-thinking混合推理模式","Hybrid Thinking（混合推理模式）",[30,12065,12066,12067,12070,12071,12074],{},"Qwen 3 引入了",[123,12068,12069],{},"可切换的推理模式","——同一个模型，加 ",[139,12072,12073],{},"enable_thinking=True"," 就变推理模型：",[132,12076,12078],{"className":134,"code":12077,"language":136,"meta":137,"style":137},"resp = client.chat.completions.create(\n    model=\"qwen3-235b-a22b\",\n    messages=[{\"role\": \"user\", \"content\": \"证明...\"}],\n    extra_body={\n        \"enable_thinking\": True,    # 开 thinking\n    },\n)\n# resp.choices[0].message.reasoning_content 是思维链\n# resp.choices[0].message.content 是最终答案\n",[139,12079,12080,12088,12099,12123,12132,12146,12150,12154,12159],{"__ignoreMap":137},[142,12081,12082,12084,12086],{"class":144,"line":145},[142,12083,1007],{"class":152},[142,12085,168],{"class":148},[142,12087,2874],{"class":152},[142,12089,12090,12092,12094,12097],{"class":144,"line":162},[142,12091,201],{"class":200},[142,12093,168],{"class":148},[142,12095,12096],{"class":206},"\"qwen3-235b-a22b\"",[142,12098,210],{"class":152},[142,12100,12101,12103,12105,12107,12109,12111,12113,12115,12117,12119,12121],{"class":144,"line":174},[142,12102,246],{"class":200},[142,12104,168],{"class":148},[142,12106,251],{"class":152},[142,12108,254],{"class":206},[142,12110,257],{"class":152},[142,12112,260],{"class":206},[142,12114,263],{"class":152},[142,12116,266],{"class":206},[142,12118,257],{"class":152},[142,12120,2912],{"class":206},[142,12122,442],{"class":152},[142,12124,12125,12128,12130],{"class":144,"line":181},[142,12126,12127],{"class":200},"    extra_body",[142,12129,168],{"class":148},[142,12131,5639],{"class":152},[142,12133,12134,12137,12139,12141,12143],{"class":144,"line":188},[142,12135,12136],{"class":206},"        \"enable_thinking\"",[142,12138,257],{"class":152},[142,12140,2217],{"class":220},[142,12142,8298],{"class":152},[142,12144,12145],{"class":184},"# 开 thinking\n",[142,12147,12148],{"class":144,"line":197},[142,12149,5743],{"class":152},[142,12151,12152],{"class":144,"line":5},[142,12153,480],{"class":152},[142,12155,12156],{"class":144,"line":230},[142,12157,12158],{"class":184},"# resp.choices[0].message.reasoning_content 是思维链\n",[142,12160,12161],{"class":144,"line":243},[142,12162,12163],{"class":184},"# resp.choices[0].message.content 是最终答案\n",[30,12165,12166,12167,12169],{},"对比 ",[861,12168,2750],{"href":4445}," 那种\"必须用单独模型\"，Qwen 3 一个模型双模式更省事——简单任务关 thinking 快、复杂题开 thinking 强。",[37,12171,11716],{"id":11716},[30,12173,12174],{},"中英日韩阿法德七种语言均衡发展，不像某些模型偏科中文或英文。对于需要多语言支持的应用（跨境电商、国际客服），Qwen 3 是理想选择。覆盖语种数 119 种，业界最广。",[37,12176,12177],{"id":12177},"阿里云生态",[30,12179,12180],{},"通过阿里云百炼平台可以直接调用，与其他阿里云服务（OSS、RDS、函数计算）集成方便。免费额度：个人开发者每月 200 万 token。",[26,12182,130],{"id":129},[37,12184,12185],{"id":12185},"阿里云百炼平台",[132,12187,12189],{"className":134,"code":12188,"language":136,"meta":137,"style":137},"from openai import OpenAI\n\nclient = OpenAI(\n    api_key=\"sk-...\",\n    base_url=\"https:\u002F\u002Fdashscope.aliyuncs.com\u002Fcompatible-mode\u002Fv1\",\n)\n\nresp = client.chat.completions.create(\n    model=\"qwen-plus\",       # 旗舰别名（背后是 Qwen3-235B）\n    temperature=0.3,\n    messages=[\n        {\"role\": \"user\", \"content\": \"...\"},\n    ],\n)\n",[139,12190,12191,12201,12205,12213,12223,12234,12238,12242,12250,12265,12275,12283,12303,12307],{"__ignoreMap":137},[142,12192,12193,12195,12197,12199],{"class":144,"line":145},[142,12194,149],{"class":148},[142,12196,3061],{"class":152},[142,12198,156],{"class":148},[142,12200,3066],{"class":152},[142,12202,12203],{"class":144,"line":162},[142,12204,178],{"emptyLinePlaceholder":177},[142,12206,12207,12209,12211],{"class":144,"line":174},[142,12208,165],{"class":152},[142,12210,168],{"class":148},[142,12212,3075],{"class":152},[142,12214,12215,12217,12219,12221],{"class":144,"line":181},[142,12216,3080],{"class":200},[142,12218,168],{"class":148},[142,12220,3085],{"class":206},[142,12222,210],{"class":152},[142,12224,12225,12227,12229,12232],{"class":144,"line":188},[142,12226,3092],{"class":200},[142,12228,168],{"class":148},[142,12230,12231],{"class":206},"\"https:\u002F\u002Fdashscope.aliyuncs.com\u002Fcompatible-mode\u002Fv1\"",[142,12233,210],{"class":152},[142,12235,12236],{"class":144,"line":197},[142,12237,480],{"class":152},[142,12239,12240],{"class":144,"line":5},[142,12241,178],{"emptyLinePlaceholder":177},[142,12243,12244,12246,12248],{"class":144,"line":230},[142,12245,1007],{"class":152},[142,12247,168],{"class":148},[142,12249,2874],{"class":152},[142,12251,12252,12254,12256,12259,12262],{"class":144,"line":243},[142,12253,201],{"class":200},[142,12255,168],{"class":148},[142,12257,12258],{"class":206},"\"qwen-plus\"",[142,12260,12261],{"class":152},",       ",[142,12263,12264],{"class":184},"# 旗舰别名（背后是 Qwen3-235B）\n",[142,12266,12267,12269,12271,12273],{"class":144,"line":272},[142,12268,233],{"class":200},[142,12270,168],{"class":148},[142,12272,4868],{"class":220},[142,12274,210],{"class":152},[142,12276,12277,12279,12281],{"class":144,"line":284},[142,12278,246],{"class":200},[142,12280,168],{"class":148},[142,12282,342],{"class":152},[142,12284,12285,12287,12289,12291,12293,12295,12297,12299,12301],{"class":144,"line":299},[142,12286,1292],{"class":152},[142,12288,254],{"class":206},[142,12290,257],{"class":152},[142,12292,260],{"class":206},[142,12294,263],{"class":152},[142,12296,266],{"class":206},[142,12298,257],{"class":152},[142,12300,4815],{"class":206},[142,12302,1064],{"class":152},[142,12304,12305],{"class":144,"line":471},[142,12306,1321],{"class":152},[142,12308,12309],{"class":144,"line":477},[142,12310,480],{"class":152},[37,12312,12314],{"id":12313},"本地-ollama-部署无需联网","本地 Ollama 部署（无需联网）",[132,12316,12318],{"className":4040,"code":12317,"language":4042,"meta":137,"style":137},"# 一键拉取 7B 模型，笔记本能跑\nollama pull qwen3:7b\nollama run qwen3:7b\n",[139,12319,12320,12325,12334],{"__ignoreMap":137},[142,12321,12322],{"class":144,"line":145},[142,12323,12324],{"class":184},"# 一键拉取 7B 模型，笔记本能跑\n",[142,12326,12327,12329,12331],{"class":144,"line":162},[142,12328,11198],{"class":4074},[142,12330,11201],{"class":206},[142,12332,12333],{"class":206}," qwen3:7b\n",[142,12335,12336,12338,12340],{"class":144,"line":174},[142,12337,11198],{"class":4074},[142,12339,11211],{"class":206},[142,12341,12333],{"class":206},[30,12343,12344],{},"然后通过 OpenAI 兼容 API 接入任何工具：",[132,12346,12348],{"className":134,"code":12347,"language":136,"meta":137,"style":137},"client = OpenAI(\n    api_key=\"ollama\",\n    base_url=\"http:\u002F\u002Flocalhost:11434\u002Fv1\",\n)\nresp = client.chat.completions.create(model=\"qwen3:7b\", messages=[...])\n",[139,12349,12350,12358,12369,12380,12384],{"__ignoreMap":137},[142,12351,12352,12354,12356],{"class":144,"line":145},[142,12353,165],{"class":152},[142,12355,168],{"class":148},[142,12357,3075],{"class":152},[142,12359,12360,12362,12364,12367],{"class":144,"line":162},[142,12361,3080],{"class":200},[142,12363,168],{"class":148},[142,12365,12366],{"class":206},"\"ollama\"",[142,12368,210],{"class":152},[142,12370,12371,12373,12375,12378],{"class":144,"line":174},[142,12372,3092],{"class":200},[142,12374,168],{"class":148},[142,12376,12377],{"class":206},"\"http:\u002F\u002Flocalhost:11434\u002Fv1\"",[142,12379,210],{"class":152},[142,12381,12382],{"class":144,"line":181},[142,12383,480],{"class":152},[142,12385,12386,12388,12390,12393,12395,12397,12400,12402,12404,12406,12408,12410],{"class":144,"line":188},[142,12387,1007],{"class":152},[142,12389,168],{"class":148},[142,12391,12392],{"class":152}," client.chat.completions.create(",[142,12394,4915],{"class":200},[142,12396,168],{"class":148},[142,12398,12399],{"class":206},"\"qwen3:7b\"",[142,12401,263],{"class":152},[142,12403,2638],{"class":200},[142,12405,168],{"class":148},[142,12407,3804],{"class":152},[142,12409,1939],{"class":220},[142,12411,12412],{"class":152},"])\n",[37,12414,12416],{"id":12415},"vllm-高性能服务","vLLM 高性能服务",[30,12418,12419],{},"企业部署推荐 vLLM：",[132,12421,12423],{"className":4040,"code":12422,"language":4042,"meta":137,"style":137},"vllm serve Qwen\u002FQwen3-72B \\\n    --tensor-parallel-size 4 \\\n    --max-model-len 131072 \\\n    --enable-prefix-caching   # 开 prompt cache\n",[139,12424,12425,12436,12444,12452],{"__ignoreMap":137},[142,12426,12427,12429,12431,12434],{"class":144,"line":145},[142,12428,11236],{"class":4074},[142,12430,11239],{"class":206},[142,12432,12433],{"class":206}," Qwen\u002FQwen3-72B",[142,12435,11245],{"class":220},[142,12437,12438,12440,12442],{"class":144,"line":162},[142,12439,11250],{"class":220},[142,12441,11253],{"class":220},[142,12443,11245],{"class":220},[142,12445,12446,12448,12450],{"class":144,"line":174},[142,12447,11260],{"class":220},[142,12449,11263],{"class":220},[142,12451,11245],{"class":220},[142,12453,12454,12456],{"class":144,"line":181},[142,12455,11270],{"class":220},[142,12457,12458],{"class":184},"   # 开 prompt cache\n",[30,12460,12461],{},"实测 4×A100 跑 72B 单并发 30 tok\u002Fs，10 并发总吞吐 200+ tok\u002Fs——比走 API 长期看更省钱。",[26,12463,4923],{"id":4923},[47,12465,12466,12482],{},[50,12467,12468],{},[53,12469,12470,12473,12476,12478,12480],{},[56,12471,12472],{},"别名（百炼）",[56,12474,12475],{},"实际型号",[56,12477,515],{},[56,12479,529],{},[56,12481,2526],{},[66,12483,12484,12498,12512,12528,12541],{},[53,12485,12486,12489,12492,12494,12496],{},[71,12487,12488],{},"qwen-turbo",[71,12490,12491],{},"Qwen3-30B-MoE",[71,12493,4964],{},[71,12495,4967],{},[71,12497,2537],{},[53,12499,12500,12503,12506,12508,12510],{},[71,12501,12502],{},"qwen-plus",[71,12504,12505],{},"Qwen3-72B \u002F 235B",[71,12507,4951],{},[71,12509,686],{},[71,12511,2537],{},[53,12513,12514,12517,12520,12523,12526],{},[71,12515,12516],{},"qwen-max",[71,12518,12519],{},"Qwen3-Max",[71,12521,12522],{},"¥2.4\u002FM",[71,12524,12525],{},"¥9.6\u002FM",[71,12527,1502],{},[53,12529,12530,12533,12535,12537,12539],{},[71,12531,12532],{},"qwen-long",[71,12534,4626],{},[71,12536,5005],{},[71,12538,686],{},[71,12540,4629],{},[53,12542,12543,12546,12549,12552,12555],{},[71,12544,12545],{},"qwen-vl-plus",[71,12547,12548],{},"Qwen3-VL",[71,12550,12551],{},"¥1.5\u002FM",[71,12553,12554],{},"¥4.5\u002FM",[71,12556,1502],{},[30,12558,12559,12561],{},[139,12560,12532],{}," 上下文 1000 万 token——业界最长，专门做超长文档场景。",[26,12563,492],{"id":492},[47,12565,12566,12574],{},[50,12567,12568],{},[53,12569,12570,12572],{},[56,12571,501],{},[56,12573,1362],{},[66,12575,12576,12583,12590,12597],{},[53,12577,12578,12581],{},[71,12579,12580],{},"Input (qwen-plus)",[71,12582,5038],{},[53,12584,12585,12588],{},[71,12586,12587],{},"Output (qwen-plus)",[71,12589,4218],{},[53,12591,12592,12594],{},[71,12593,5049],{},[71,12595,12596],{},"每月 200 万 token",[53,12598,12599,12601],{},[71,12600,5760],{},[71,12602,9747],{},[30,12604,12605,12606,12608],{},"是 ",[861,12607,680],{"href":3653}," 之后第二便宜的选项。",[26,12610,2222],{"id":2222},[47,12612,12613,12623],{},[50,12614,12615],{},[53,12616,12617,12619,12621],{},[56,12618,2231],{},[56,12620,713],{},[56,12622,4109],{},[66,12624,12625,12636,12647,12659,12671,12684],{},[53,12626,12627,12631,12633],{},[71,12628,12629],{},[139,12630,1344],{},[71,12632,7665],{},[71,12634,12635],{},"通用",[53,12637,12638,12642,12644],{},[71,12639,12640],{},[139,12641,1344],{},[71,12643,238],{},[71,12645,12646],{},"工具调用 \u002F 代码",[53,12648,12649,12653,12656],{},[71,12650,12651],{},[139,12652,4140],{},[71,12654,12655],{},"0.8",[71,12657,12658],{},"默认",[53,12660,12661,12665,12668],{},[71,12662,12663],{},[139,12664,2279],{},[71,12666,12667],{},"显式",[71,12669,12670],{},"默认 1500 太短",[53,12672,12673,12678,12681],{},[71,12674,12675],{},[139,12676,12677],{},"enable_thinking",[71,12679,12680],{},"False \u002F True",[71,12682,12683],{},"是否开推理模式",[53,12685,12686,12691,12694],{},[71,12687,12688],{},[139,12689,12690],{},"repetition_penalty",[71,12692,12693],{},"1.05",[71,12695,12696],{},"防止重复",[26,12698,12699],{"id":12699},"与同档国产模型怎么选",[47,12701,12702,12716],{},[50,12703,12704],{},[53,12705,12706,12708,12710,12712,12714],{},[56,12707,1451],{},[56,12709,4347],{},[56,12711,2504],{},[56,12713,680],{},[56,12715,4618],{},[66,12717,12718,12730,12742,12756,12770,12782,12794],{},[53,12719,12720,12722,12724,12726,12728],{},[71,12721,1471],{},[71,12723,1601],{},[71,12725,692],{},[71,12727,692],{},[71,12729,1601],{},[53,12731,12732,12734,12736,12738,12740],{},[71,12733,11716],{},[71,12735,644],{},[71,12737,1601],{},[71,12739,1601],{},[71,12741,1601],{},[53,12743,12744,12747,12749,12751,12754],{},[71,12745,12746],{},"端侧部署",[71,12748,644],{},[71,12750,1601],{},[71,12752,12753],{},"★☆☆☆☆",[71,12755,2601],{},[53,12757,12758,12761,12763,12765,12768],{},[71,12759,12760],{},"Hybrid Thinking",[71,12762,1522],{},[71,12764,2601],{},[71,12766,12767],{},"分别用 R1",[71,12769,2601],{},[53,12771,12772,12774,12776,12778,12780],{},[71,12773,5227],{},[71,12775,4951],{},[71,12777,686],{},[71,12779,683],{},[71,12781,3529],{},[53,12783,12784,12786,12788,12790,12792],{},[71,12785,2987],{},[71,12787,4408],{},[71,12789,4405],{},[71,12791,11670],{},[71,12793,2601],{},[53,12795,12796,12799,12801,12803,12805],{},[71,12797,12798],{},"国内云生态",[71,12800,5264],{},[71,12802,5258],{},[71,12804,5261],{},[71,12806,12807],{},"Moonshot",[30,12809,12810,2617],{},[123,12811,3464],{},[568,12813,12814,12817,12820,12823,12826],{},[571,12815,12816],{},"本地部署 \u002F 端侧 → Qwen 3（7B 是同档最强）",[571,12818,12819],{},"多语言应用 → Qwen 3",[571,12821,12822],{},"主力编程 → GLM-5.2",[571,12824,12825],{},"极致低成本 → DeepSeek-V3",[571,12827,12828],{},"长文档分析 → Kimi K2 \u002F qwen-long",[26,12830,12831],{"id":12831},"端侧场景的真实价值",[30,12833,12834],{},"7B 模型本地能跑意味着：",[47,12836,12837,12845],{},[50,12838,12839],{},[53,12840,12841,12843],{},[56,12842,58],{},[56,12844,6943],{},[66,12846,12847,12855,12863,12871,12879],{},[53,12848,12849,12852],{},[71,12850,12851],{},"个人助手",[71,12853,12854],{},"笔记\u002F邮件\u002F搜索本地处理，数据不出本机",[53,12856,12857,12860],{},[71,12858,12859],{},"IoT 设备",[71,12861,12862],{},"离线语音对话、智能音箱",[53,12864,12865,12868],{},[71,12866,12867],{},"浏览器扩展",[71,12869,12870],{},"离线网页摘要 \u002F 翻译",[53,12872,12873,12876],{},[71,12874,12875],{},"移动 App",[71,12877,12878],{},"iOS\u002FAndroid 端侧推理",[53,12880,12881,12884],{},[71,12882,12883],{},"企业边缘",[71,12885,12886],{},"工厂 \u002F 医院本地部署",[30,12888,12889],{},"Qwen 3 是国内做这类场景的事实标准。",[26,12891,825],{"id":825},[568,12893,12894,12906,12914,12920,12926,12934],{},[571,12895,12896,12901,12902,12905],{},[123,12897,12898,12900],{},[139,12899,12502],{}," 别名背后型号变化","：阿里偶尔切换后台模型，行为可能小幅变化。生产用 ",[139,12903,12904],{},"qwen3-235b-a22b"," 这种带版本的具体型号。",[571,12907,12908,12913],{},[123,12909,12910,12912],{},[139,12911,12073],{}," 不要白开","：简单任务开了变慢且更贵，按需启用。",[571,12915,12916,12919],{},[123,12917,12918],{},"多语言不等于翻译","：跨语言生成可能掺杂代码切换问题，重要场景仍建议用专业翻译模型。",[571,12921,12922,12925],{},[123,12923,12924],{},"本地 Ollama 量化精度","：默认 q4 量化质量损失明显，重要场景用 q8 或 fp16。",[571,12927,12928,12933],{},[123,12929,12930,12932],{},[139,12931,12532],{}," 价格陷阱","：上下文越长单次成本越高，10M 上下文用一次几十块。",[571,12935,12936,12939],{},[123,12937,12938],{},"Batch API 24h 出结果","：实时任务别走 batch。",[26,12941,854],{"id":854},[568,12943,12944,12949,12954,12960],{},[571,12945,11885,12946,12948],{},[861,12947,3592],{"href":3591},"（在 Qwen 上微调）",[571,12950,12951,12952],{},"推理模式原理：",[861,12953,1349],{"href":1677},[571,12955,6144,12956,865,12958],{},[861,12957,680],{"href":3653},[861,12959,2504],{"href":4486},[571,12961,12962,12963],{},"多语言场景：",[861,12964,5371],{"href":5370},[885,12966,4502],{},{"title":137,"searchDepth":174,"depth":174,"links":12968},[12969,12970,12976,12981,12982,12983,12984,12985,12986,12987],{"id":28,"depth":162,"text":28},{"id":35,"depth":162,"text":35,"children":12971},[12972,12973,12974,12975],{"id":11985,"depth":174,"text":11985},{"id":12062,"depth":174,"text":12063},{"id":11716,"depth":174,"text":11716},{"id":12177,"depth":174,"text":12177},{"id":129,"depth":162,"text":130,"children":12977},[12978,12979,12980],{"id":12185,"depth":174,"text":12185},{"id":12313,"depth":174,"text":12314},{"id":12415,"depth":174,"text":12416},{"id":4923,"depth":162,"text":4923},{"id":492,"depth":162,"text":492},{"id":2222,"depth":162,"text":2222},{"id":12699,"depth":162,"text":12699},{"id":12831,"depth":162,"text":12831},{"id":825,"depth":162,"text":825},{"id":854,"depth":162,"text":854},131072,"阿里通义千问 Qwen 3 系列开源大模型，0.5B 至 235B 全尺寸覆盖，端侧小模型性能业界领先，多语言（119 种）能力广泛，国内可直连 + 慷慨免费 API 额度，适合从边缘设备到云端 GPU 的全栈部署。",{},"\u002Fmodels\u002Fqwen-3","Input ¥0.8\u002FM · Output ¥2\u002FM（免费 API 额度）",[3700,11942,3699],"2025-09-25",{"title":4347,"description":12989},"qwen-3","models\u002Fqwen-3",[12999,13000,13001,13002,13003],"开源全系列（0.5B 到 235B），覆盖端侧到云端","多语言能力强，中英日韩阿法德表现均衡","免费 API 额度大方，阿里云百炼平台可直接调用","端侧小模型（7B\u002F14B）性能领先，适合本地部署","支持长文档理解和代码生成",[13005,13006,13007,13008],"端侧 AI 部署（手机\u002FIoT\u002F本地）","多语言应用开发","低成本 API 调用","企业私有化部署","阿里巴巴","Alibaba",[13012,13013,938,13014],"编程实操不如 Claude Sonnet 4 \u002F GLM-5.2","Agent 工具调用生态不如 Anthropic 成熟","多步推理稳定性一般","sRIwsG5ksoXekmio_f5Yx0BZ-xFnNldjlIysLnb_h74",{"tools":4,"reviews":5,"playbooks":272,"news":230},1782316491397]