[{"data":1,"prerenderedAt":822},["ShallowReactive",2],{"header-counts":3,"footer-counts":6,"wiki-embedding":9},{"tools":4,"reviews":5},65,7,{"tools":4,"reviews":5,"playbooks":7,"news":8},10,8,{"id":10,"title":11,"body":12,"category":801,"description":49,"extension":802,"meta":803,"navigation":804,"path":805,"published":806,"relatedModels":807,"relatedTools":809,"seo":812,"slug":813,"stem":814,"summary":815,"tags":816,"updated":806,"__hash__":821},"wiki\u002Fwiki\u002Fembedding.md","Embedding（向量嵌入）",{"type":13,"value":14,"toc":764},"minimark",[15,20,24,32,36,39,50,53,58,61,65,68,72,80,84,87,91,210,213,234,244,247,250,300,306,310,313,373,376,380,383,459,465,469,476,480,487,493,496,500,507,519,526,529,533,536,547,551,554,558,561,565,568,579,582,675,679,682,702,705,709,712,716,719,723,730,734,737,740],[16,17,19],"h2",{"id":18},"什么是-embedding","什么是 Embedding",[21,22,23],"p",{},"Embedding（向量嵌入）是把文本、图片、音频等数据转换成一组数字（向量）的过程。这组数字保留了原始数据的语义信息——语义相近的内容，向量距离也近。",[21,25,26,27,31],{},"简单说：",[28,29,30],"strong",{},"把\"意思\"变成\"坐标\"","。",[16,33,35],{"id":34},"为什么需要-embedding","为什么需要 Embedding",[21,37,38],{},"计算机不能直接理解\"猫和老虎比猫和桌子更像\"。但如果把每个词映射到高维空间中的一个点，让\"猫\"和\"老虎\"的坐标接近、\"猫\"和\"桌子\"的坐标远离，计算机就能通过计算距离来判断语义相似度。",[40,41,46],"pre",{"className":42,"code":44,"language":45},[43],"language-text","猫   → [0.21, -0.35, 0.88, ...]  ← 768 维向量\n老虎 → [0.19, -0.31, 0.91, ...]  ← 与\"猫\"距离很近\n桌子 → [-0.72, 0.55, -0.12, ...] ← 与\"猫\"距离很远\n","text",[47,48,44],"code",{"__ignoreMap":49},"",[16,51,52],{"id":52},"工作原理",[54,55,57],"h3",{"id":56},"_1-文本-token","1. 文本 → Token",[21,59,60],{},"先把文本切分成 token（与 LLM 一样的分词方式）。",[54,62,64],{"id":63},"_2-token-向量","2. Token → 向量",[21,66,67],{},"通过预训练的 Embedding 模型，把每个 token 映射到高维空间。",[54,69,71],{"id":70},"_3-聚合","3. 聚合",[21,73,74,75,79],{},"把一段文本所有 token 的向量聚合成一个向量（通常用平均池化或特殊 ",[76,77,78],"span",{},"CLS"," token）。",[54,81,83],{"id":82},"_4-存储-检索","4. 存储 + 检索",[21,85,86],{},"把生成的向量存入向量数据库，查询时把问题也转成向量，找距离最近的。",[16,88,90],{"id":89},"常见-embedding-模型","常见 Embedding 模型",[92,93,94,113],"table",{},[95,96,97],"thead",{},[98,99,100,104,107,110],"tr",{},[101,102,103],"th",{},"模型",[101,105,106],{},"维度",[101,108,109],{},"特点",[101,111,112],{},"价格",[114,115,116,131,145,159,172,185,197],"tbody",{},[98,117,118,122,125,128],{},[119,120,121],"td",{},"OpenAI text-embedding-3-large",[119,123,124],{},"3072",[119,126,127],{},"通用、稳定",[119,129,130],{},"$0.13\u002FM",[98,132,133,136,139,142],{},[119,134,135],{},"OpenAI text-embedding-3-small",[119,137,138],{},"1536",[119,140,141],{},"性价比高",[119,143,144],{},"$0.02\u002FM",[98,146,147,150,153,156],{},[119,148,149],{},"BGE-large-zh-v1.5",[119,151,152],{},"1024",[119,154,155],{},"中文最佳（开源）",[119,157,158],{},"免费",[98,160,161,164,166,169],{},[119,162,163],{},"Jina Embeddings v3",[119,165,152],{},[119,167,168],{},"多语言",[119,170,171],{},"免费（开源）",[98,173,174,177,179,182],{},[119,175,176],{},"Cohere embed v4",[119,178,152],{},[119,180,181],{},"多语言+多模态",[119,183,184],{},"$0.10\u002FM",[98,186,187,190,192,195],{},[119,188,189],{},"GTE-large",[119,191,152],{},[119,193,194],{},"阿里出品（开源）",[119,196,158],{},[98,198,199,202,204,207],{},[119,200,201],{},"voyage-code-3",[119,203,152],{},[119,205,206],{},"代码专用",[119,208,209],{},"$0.18\u002FM",[54,211,212],{"id":212},"维度选择",[214,215,216,223,228],"ul",{},[217,218,219,222],"li",{},[28,220,221],{},"512-768"," — 精度低但速度快，适合大规模粗筛",[217,224,225,227],{},[28,226,152],{}," — 平衡，最常用",[217,229,230,233],{},[28,231,232],{},"1536-3072"," — 高精度，适合对召回质量要求高的场景",[21,235,236,239,240,243],{},[47,237,238],{},"text-embedding-3"," 系列支持 ",[28,241,242],{},"Matryoshka","：训练时就让前 N 维独立可用，需要时直接截断到 256 \u002F 512 \u002F 1024，不用重新 embed。省存储省检索时间。",[16,245,246],{"id":246},"相似度计算",[21,248,249],{},"两个向量之间的\"距离\"有几种计算方式：",[92,251,252,265],{},[95,253,254],{},[98,255,256,259,262],{},[101,257,258],{},"方法",[101,260,261],{},"说明",[101,263,264],{},"适用",[114,266,267,278,289],{},[98,268,269,272,275],{},[119,270,271],{},"余弦相似度",[119,273,274],{},"最常用，衡量方向相似性",[119,276,277],{},"语义搜索（推荐）",[98,279,280,283,286],{},[119,281,282],{},"欧氏距离",[119,284,285],{},"衡量绝对距离",[119,287,288],{},"图像检索",[98,290,291,294,297],{},[119,292,293],{},"点积",[119,295,296],{},"最快，但受向量长度影响",[119,298,299],{},"大规模检索（需归一化）",[21,301,302,305],{},[28,303,304],{},"推荐用余弦相似度","，它对向量长度不敏感，适合文本语义匹配。",[16,307,309],{"id":308},"ann-索引百万向量怎么秒级查","ANN 索引：百万向量怎么秒级查",[21,311,312],{},"数据量小时（\u003C 10 万）暴力遍历就够。上百万级就必须用 ANN（Approximate Nearest Neighbor）索引：",[92,314,315,327],{},[95,316,317],{},[98,318,319,322,325],{},[101,320,321],{},"索引",[101,323,324],{},"原理",[101,326,264],{},[114,328,329,340,351,362],{},[98,330,331,334,337],{},[119,332,333],{},"HNSW",[119,335,336],{},"多层图结构跳跃查找",[119,338,339],{},"通用首选，召回率最高",[98,341,342,345,348],{},[119,343,344],{},"IVF",[119,346,347],{},"先聚类后局部搜索",[119,349,350],{},"上亿向量，内存敏感",[98,352,353,356,359],{},[119,354,355],{},"IVF-PQ",[119,357,358],{},"IVF + 乘积量化压缩",[119,360,361],{},"超大规模，能省 8-16x 内存",[98,363,364,367,370],{},[119,365,366],{},"Flat",[119,368,369],{},"暴力扫描",[119,371,372],{},"\u003C 10 万向量，要求 100% 召回",[21,374,375],{},"经验值：100 万 1024 维向量，HNSW 单机能做到 \u003C10ms 查询；上亿规模建议 IVF-PQ + 多机分片。",[16,377,379],{"id":378},"chunk-size-的取舍","Chunk Size 的取舍",[21,381,382],{},"文档分块大小直接决定 RAG 质量，没有银弹，但有经验区间：",[92,384,385,401],{},[95,386,387],{},[98,388,389,392,395,398],{},[101,390,391],{},"chunk size",[101,393,394],{},"优点",[101,396,397],{},"缺点",[101,399,400],{},"适合",[114,402,403,417,431,445],{},[98,404,405,408,411,414],{},[119,406,407],{},"128-256 token",[119,409,410],{},"检索精度高",[119,412,413],{},"上下文不完整",[119,415,416],{},"FAQ、短问答",[98,418,419,422,425,428],{},[119,420,421],{},"512 token",[119,423,424],{},"平衡，最常用",[119,426,427],{},"—",[119,429,430],{},"通用文档",[98,432,433,436,439,442],{},[119,434,435],{},"1024 token",[119,437,438],{},"上下文完整",[119,440,441],{},"检索精度下降",[119,443,444],{},"长文档、技术手册",[98,446,447,450,453,456],{},[119,448,449],{},"整章\u002F整页",[119,451,452],{},"语义完整",[119,454,455],{},"噪音多、贵",[119,457,458],{},"Late Chunking 场景",[21,460,461,464],{},[28,462,463],{},"重叠（overlap）一般设 chunk size 的 10-20%","——避免关键句被切到两个 chunk 边界都丢失。",[54,466,468],{"id":467},"late-chunking","Late Chunking",[21,470,471,472,475],{},"新思路：先 embedding 整篇文档，",[28,473,474],{},"然后再切","。让每个 chunk 的向量保留全文上下文。Jina v3 等模型原生支持，对长文档效果显著优于先切后 embed。",[16,477,479],{"id":478},"混合检索bm25-向量","混合检索：BM25 + 向量",[21,481,482,483,486],{},"纯向量检索有死角——人名、型号、错别字、罕见专业术语，关键词匹配往往更准。生产 RAG 几乎都用",[28,484,485],{},"混合检索","：",[40,488,491],{"className":489,"code":490,"language":45},[43],"query\n  ├→ BM25 检索       → 候选集 A（关键词命中）\n  ├→ Vector 检索     → 候选集 B（语义命中）\n  ↓\n  RRF \u002F 加权融合     → top-K 候选\n  ↓\n  Reranker 二阶段排序 → 最终 top-N\n  ↓\n  喂给 LLM\n",[47,492,490],{"__ignoreMap":49},[21,494,495],{},"经验值：BM25 和向量按 0.5 \u002F 0.5 加权融合就能比纯向量提升 5-15% 召回率。",[16,497,499],{"id":498},"reranker二阶段排序","Reranker：二阶段排序",[21,501,502,503,506],{},"向量检索快但粗。",[28,504,505],{},"Reranker（Cross-Encoder）"," 慢但准——把 query 和每个候选一起输入模型，输出相关性分数。流程：",[508,509,510,513,516],"ol",{},[217,511,512],{},"向量检索召回 top-100",[217,514,515],{},"Reranker 重排 top-100 → top-10",[217,517,518],{},"top-10 喂给 LLM",[21,520,521,522,525],{},"主流 Reranker：BGE-reranker-v2、Cohere Rerank 3、Jina Reranker v2。",[28,523,524],{},"Reranker 是 RAG 最高 ROI 的优化点之一","，加一步通常能让最终答案质量提升 10-20%。",[16,527,528],{"id":528},"应用场景",[54,530,532],{"id":531},"_1-rag检索增强生成","1. RAG（检索增强生成）",[21,534,535],{},"RAG 的核心步骤就是 Embedding：",[508,537,538,541,544],{},[217,539,540],{},"把知识库文档全部 Embedding → 存入向量数据库",[217,542,543],{},"用户提问 → Embedding → 在向量数据库找最相似的文档片段",[217,545,546],{},"把文档片段 + 问题一起发给 LLM → 生成回答",[54,548,550],{"id":549},"_2-语义搜索","2. 语义搜索",[21,552,553],{},"传统搜索靠关键词匹配，搜\"手机\"找不到\"智能手机\"。语义搜索用 Embedding，搜\"手机\"能找到\"移动通信设备\"。",[54,555,557],{"id":556},"_3-推荐系统","3. 推荐系统",[21,559,560],{},"把用户行为和内容都 Embedding，推荐与用户向量最近的内容。",[54,562,564],{"id":563},"_4-去重与聚类","4. 去重与聚类",[21,566,567],{},"把文档 Embedding 后聚类，相似文档自动归为一类。用于：",[214,569,570,573,576],{},[217,571,572],{},"新闻去重",[217,574,575],{},"文档分类",[217,577,578],{},"知识图谱构建",[16,580,581],{"id":581},"向量数据库",[92,583,584,596],{},[95,585,586],{},[98,587,588,591,593],{},[101,589,590],{},"数据库",[101,592,109],{},[101,594,595],{},"适用场景",[114,597,598,609,620,631,642,653,664],{},[98,599,600,603,606],{},[119,601,602],{},"Chroma",[119,604,605],{},"轻量、Python 原生",[119,607,608],{},"原型开发",[98,610,611,614,617],{},[119,612,613],{},"Qdrant",[119,615,616],{},"Rust 高性能、支持过滤",[119,618,619],{},"生产环境",[98,621,622,625,628],{},[119,623,624],{},"Milvus",[119,626,627],{},"分布式、亿级向量",[119,629,630],{},"企业级",[98,632,633,636,639],{},[119,634,635],{},"Pinecone",[119,637,638],{},"托管 SaaS、免运维",[119,640,641],{},"快速上线",[98,643,644,647,650],{},[119,645,646],{},"pgvector",[119,648,649],{},"PostgreSQL 扩展",[119,651,652],{},"已有 PG 的项目",[98,654,655,658,661],{},[119,656,657],{},"Weaviate",[119,659,660],{},"内置多模态",[119,662,663],{},"多模态搜索",[98,665,666,669,672],{},[119,667,668],{},"libsql \u002F sqlite-vec",[119,670,671],{},"SQLite 扩展",[119,673,674],{},"嵌入式 \u002F 边缘部署",[16,676,678],{"id":677},"批量-embedding-的成本优化","批量 Embedding 的成本优化",[21,680,681],{},"百万级文档全量 embed 一次很贵。三招省钱：",[508,683,684,690,696],{},[217,685,686,689],{},[28,687,688],{},"批量 API","——OpenAI \u002F Cohere 都有 batch API，价格直接打五折，24 小时内出结果。",[217,691,692,695],{},[28,693,694],{},"本地开源模型","——BGE、GTE 在一张 4090 上跑 100 万段文本只要几小时，电费可忽略。",[217,697,698,701],{},[28,699,700],{},"增量更新","——文档没变就别重新 embed，加 hash 去重。",[16,703,704],{"id":704},"常见问题",[54,706,708],{"id":707},"q-embedding-维度越高越好吗","Q: Embedding 维度越高越好吗",[21,710,711],{},"不是。高维度带来更高精度，但也带来更大存储和更慢检索。768-1536 维对大多数场景够用。",[54,713,715],{"id":714},"q-不同语言的-embedding-能互相对比吗","Q: 不同语言的 Embedding 能互相对比吗",[21,717,718],{},"可以，前提是用了多语言 Embedding 模型（如 BGE-m3、Jina v3）。这样中文\"猫\"和英文\"cat\"的向量距离会很近。",[54,720,722],{"id":721},"q-embedding-能理解代码吗","Q: Embedding 能理解代码吗",[21,724,725,726,729],{},"专门的代码 Embedding 模型（如 voyage-code-3、jina-embeddings-v2-code）可以。通用 Embedding 模型对代码的语义理解有限——会把所有 ",[47,727,728],{},"import os"," 都判定为高度相似。",[54,731,733],{"id":732},"q-换-embedding-模型要重新跑全库吗","Q: 换 Embedding 模型要重新跑全库吗",[21,735,736],{},"是。向量空间不通用，不同模型生成的向量不能混用。这也是为什么生产环境上选模型要慎重——重新 embed 一次百万级文档不便宜。",[16,738,739],{"id":739},"延伸阅读",[214,741,742,750,757],{},[217,743,744,745],{},"应用架构：",[746,747,749],"a",{"href":748},"\u002Fwiki\u002Frag.html","RAG（检索增强生成）",[217,751,752,753],{},"上下文组装：",[746,754,756],{"href":755},"\u002Fwiki\u002Fcontext-engineering.html","Context Engineering",[217,758,759,760],{},"计费基础：",[746,761,763],{"href":762},"\u002Fwiki\u002Ftoken.html","Token",{"title":49,"searchDepth":765,"depth":765,"links":766},3,[767,769,770,776,779,780,781,784,785,786,792,793,794,800],{"id":18,"depth":768,"text":19},2,{"id":34,"depth":768,"text":35},{"id":52,"depth":768,"text":52,"children":771},[772,773,774,775],{"id":56,"depth":765,"text":57},{"id":63,"depth":765,"text":64},{"id":70,"depth":765,"text":71},{"id":82,"depth":765,"text":83},{"id":89,"depth":768,"text":90,"children":777},[778],{"id":212,"depth":765,"text":212},{"id":246,"depth":768,"text":246},{"id":308,"depth":768,"text":309},{"id":378,"depth":768,"text":379,"children":782},[783],{"id":467,"depth":765,"text":468},{"id":478,"depth":768,"text":479},{"id":498,"depth":768,"text":499},{"id":528,"depth":768,"text":528,"children":787},[788,789,790,791],{"id":531,"depth":765,"text":532},{"id":549,"depth":765,"text":550},{"id":556,"depth":765,"text":557},{"id":563,"depth":765,"text":564},{"id":581,"depth":768,"text":581},{"id":677,"depth":768,"text":678},{"id":704,"depth":768,"text":704,"children":795},[796,797,798,799],{"id":707,"depth":765,"text":708},{"id":714,"depth":765,"text":715},{"id":721,"depth":765,"text":722},{"id":732,"depth":765,"text":733},{"id":739,"depth":768,"text":739},"concept","md",{},true,"\u002Fwiki\u002Fembedding","2026-06-21",[808],"gpt-5",[810,811],"agent\u002Fplatform\u002Fdify","agent\u002Fplatform\u002Ffastgpt",{"title":11,"description":49},"embedding","wiki\u002Fembedding","把文本、图片等数据转成高维向量，让机器能通过向量距离衡量语义相似度——RAG、搜索、推荐的基础。",[817,818,819,820],"Embedding","向量","语义搜索","RAG","HQZClqn97SO2Ug4d_0Qvn1QGq4je614hZOOL-XbFnG4",1782316490730]