[{"data":1,"prerenderedAt":1416},["ShallowReactive",2],{"blog-/blog/enterprise-rag-guide":3,"blog-related-/blog/enterprise-rag-guide":530},{"id":4,"title":5,"author":6,"body":7,"category":513,"cover":514,"date":515,"description":516,"extension":517,"meta":518,"navigation":519,"path":520,"readingTime":521,"seo":522,"stem":523,"tags":524,"__hash__":529},"blog/blog/enterprise-rag-guide.md","企业知识库 RAG 实战：从文档到 AI 问答的 5 个关键步骤","仙宫云技术团队",{"type":8,"value":9,"toc":490},"minimark",[10,19,24,30,36,103,109,113,116,121,146,150,153,173,177,180,184,187,191,211,215,261,265,272,293,296,300,303,313,318,329,333,336,340,343,363,367,375,379,382,396,400,432,436,439,465,473,476],[11,12,13,14,18],"p",{},"\"我们公司有几万份 Word/PDF 文档，想做一个 AI 问答助手，新员工有问题直接问就能拿答案，可不可行？\"——这是仙宫云客户最高频的需求之一。答案是肯定的，技术路径就是 ",[15,16,17],"strong",{},"RAG（Retrieval-Augmented Generation，检索增强生成）","。本文拆解从 0 到 1 的 5 个关键步骤。",[20,21,23],"h2",{"id":22},"一rag-是什么为什么不直接微调模型","一、RAG 是什么？为什么不直接微调模型？",[11,25,26,29],{},[15,27,28],{},"RAG 的核心思想","：用户提问 → 先从企业文档库检索最相关的几段内容 → 把这些内容作为上下文交给大模型 → 大模型基于上下文生成回答。",[11,31,32,35],{},[15,33,34],{},"对比微调（Fine-tuning）","，RAG 有三个企业级优势：",[37,38,39,55],"table",{},[40,41,42],"thead",{},[43,44,45,49,52],"tr",{},[46,47,48],"th",{},"维度",[46,50,51],{},"RAG",[46,53,54],{},"微调",[56,57,58,70,81,92],"tbody",{},[43,59,60,64,67],{},[61,62,63],"td",{},"知识更新",[61,65,66],{},"改文档即可",[61,68,69],{},"需要重新训练",[43,71,72,75,78],{},[61,73,74],{},"成本",[61,76,77],{},"低（无需 GPU 训练）",[61,79,80],{},"高（数据 + 算力）",[43,82,83,86,89],{},[61,84,85],{},"可追溯",[61,87,88],{},"答案能引用原文",[61,90,91],{},"黑盒输出",[43,93,94,97,100],{},[61,95,96],{},"数据安全",[61,98,99],{},"文档保留在向量库",[61,101,102],{},"知识被吸收进权重",[11,104,105,108],{},[15,106,107],{},"结论","：90% 的企业知识库场景，用 RAG 比微调更合适。",[20,110,112],{"id":111},"二step-1文档预处理最容易被低估的环节","二、Step 1：文档预处理（最容易被低估的环节）",[11,114,115],{},"垃圾进，垃圾出。RAG 效果上限被这一步决定。",[117,118,120],"h3",{"id":119},"_21-文档收集与格式统一","2.1 文档收集与格式统一",[122,123,124,128,131],"ul",{},[125,126,127],"li",{},"收集来源：Word、PDF、PPT、Markdown、Confluence、邮件归档",[125,129,130],{},"统一转 Markdown 或纯文本，保留标题层级",[125,132,133,134,138,139,138,142,145],{},"工具推荐：",[135,136,137],"code",{},"unstructured","、",[135,140,141],{},"Docling",[135,143,144],{},"MinerU","（中文 PDF 表现好）",[117,147,149],{"id":148},"_22-切片chunking策略","2.2 切片（Chunking）策略",[11,151,152],{},"切片大小直接影响检索精度：",[122,154,155,161,167],{},[125,156,157,160],{},[15,158,159],{},"太大","（>1500 字）：检索粒度粗，无关内容多",[125,162,163,166],{},[15,164,165],{},"太小","（\u003C200 字）：上下文不完整，模型无法理解",[125,168,169,172],{},[15,170,171],{},"推荐","：500-800 字 + 50-100 字重叠（overlap）",[117,174,176],{"id":175},"_23-元数据标注","2.3 元数据标注",[11,178,179],{},"每个切片附加元数据：来源文档、章节、更新日期、部门、权限等级。这些字段在检索阶段可以做过滤，比如\"只查财务部 2025 年之后的制度\"。",[20,181,183],{"id":182},"三step-2向量化与向量数据库","三、Step 2：向量化与向量数据库",[11,185,186],{},"把文本切片转成向量，让\"语义相似度\"可以被计算。",[117,188,190],{"id":189},"_31-中文-embedding-模型推荐","3.1 中文 Embedding 模型推荐",[122,192,193,199,205],{},[125,194,195,198],{},[15,196,197],{},"bge-m3","（智源）：多语言、长文本、目前中文综合最佳",[125,200,201,204],{},[15,202,203],{},"text2vec-base-chinese","：轻量，适合资源有限场景",[125,206,207,210],{},[15,208,209],{},"OpenAI text-embedding-3-large","：闭源但效果稳定（数据出域慎用）",[117,212,214],{"id":213},"_32-向量数据库选型","3.2 向量数据库选型",[37,216,217,227],{},[40,218,219],{},[43,220,221,224],{},[46,222,223],{},"数据库",[46,225,226],{},"适用场景",[56,228,229,237,245,253],{},[43,230,231,234],{},[61,232,233],{},"Milvus",[61,235,236],{},"大规模（千万级以上向量），生产首选",[43,238,239,242],{},[61,240,241],{},"Qdrant",[61,243,244],{},"中小规模，部署简单，过滤能力强",[43,246,247,250],{},[61,248,249],{},"Chroma",[61,251,252],{},"POC 验证、小团队",[43,254,255,258],{},[61,256,257],{},"PostgreSQL + pgvector",[61,259,260],{},"已有 PG 基础设施，向量量级 100 万以内",[20,262,264],{"id":263},"四step-3检索策略决定准确率的关键","四、Step 3：检索策略（决定准确率的关键）",[11,266,267,268,271],{},"只用向量相似度（dense retrieval）远远不够。生产级 RAG 一定要做 ",[15,269,270],{},"混合检索","：",[273,274,275,281,287],"ol",{},[125,276,277,280],{},[15,278,279],{},"向量检索","：找语义相似的切片",[125,282,283,286],{},[15,284,285],{},"关键词检索（BM25）","：找精确匹配关键词的切片",[125,288,289,292],{},[15,290,291],{},"重排（Rerank）","：用 bge-reranker 等模型对 Top-20 结果重新打分，取 Top-5",[11,294,295],{},"加上 Rerank 后准确率通常能再提升 15-25%，是性价比最高的优化点。",[20,297,299],{"id":298},"五step-4prompt-设计","五、Step 4：Prompt 设计",[11,301,302],{},"RAG 的 Prompt 模板看似简单，细节决定效果：",[304,305,310],"pre",{"className":306,"code":308,"language":309},[307],"language-text","你是企业知识助手。请严格基于以下\"参考资料\"回答用户问题。\n\n要求：\n1. 答案必须来自参考资料，不要编造\n2. 如果资料中没有相关信息，明确说\"知识库中暂无相关内容\"\n3. 回答末尾标注引用的来源文档\n\n参考资料：\n{retrieved_chunks}\n\n用户问题：{question}\n","text",[135,311,308],{"__ignoreMap":312},"",[11,314,315,271],{},[15,316,317],{},"反幻觉的三个关键约束",[122,319,320,323,326],{},[125,321,322],{},"明确\"必须基于资料\"",[125,324,325],{},"给出\"无答案\"的退出路径",[125,327,328],{},"强制引用来源（用户也能验证）",[20,330,332],{"id":331},"六step-5效果评估与迭代","六、Step 5：效果评估与迭代",[11,334,335],{},"很多企业上线 RAG 后没有评估机制，导致问题积累、用户流失。建议建立三层评估：",[117,337,339],{"id":338},"_61-离线评估","6.1 离线评估",[11,341,342],{},"构建 100-500 条测试问答对，定期跑：",[122,344,345,351,357],{},[125,346,347,350],{},[15,348,349],{},"召回率","：相关切片是否在检索结果 Top-K 中",[125,352,353,356],{},[15,354,355],{},"答案准确率","：人工或大模型评分",[125,358,359,362],{},[15,360,361],{},"拒答率","：无答案问题是否正确拒答",[117,364,366],{"id":365},"_62-在线监控","6.2 在线监控",[122,368,369,372],{},[125,370,371],{},"记录每个问答的：query、检索结果、最终答案、用户反馈（赞/踩）",[125,373,374],{},"重点关注被踩的问答，定位是检索失败还是生成失败",[117,376,378],{"id":377},"_63-持续优化循环","6.3 持续优化循环",[11,380,381],{},"每周/每月迭代一次：",[122,383,384,387,390,393],{},[125,385,386],{},"补充缺失文档",[125,388,389],{},"调整切片策略",[125,391,392],{},"优化 Prompt",[125,394,395],{},"升级 Embedding/Rerank 模型",[20,397,399],{"id":398},"七企业-rag-落地的常见误区","七、企业 RAG 落地的常见误区",[273,401,402,408,414,420,426],{},[125,403,404,407],{},[15,405,406],{},"以为上线就完事","：RAG 是持续运营产品，不是一次性项目",[125,409,410,413],{},[15,411,412],{},"只用单一检索","：纯向量检索准确率上限低",[125,415,416,419],{},[15,417,418],{},"忽略权限控制","：财务文档不能让所有员工查到",[125,421,422,425],{},[15,423,424],{},"没做引用展示","：用户无法验证答案，信任度低",[125,427,428,431],{},[15,429,430],{},"没建反馈闭环","：不知道哪里错、怎么改",[20,433,435],{"id":434},"八仙宫云的企业知识库方案","八、仙宫云的企业知识库方案",[11,437,438],{},"仙宫云提供从大模型私有化部署到 RAG 应用的完整服务：",[122,440,441,447,453,459],{},[125,442,443,446],{},[15,444,445],{},"场景调研","：识别哪些文档值得做、用户高频问题摸底",[125,448,449,452],{},[15,450,451],{},"数据治理","：文档清洗、敏感信息脱敏、权限分级",[125,454,455,458],{},[15,456,457],{},"技术实施","：私有化部署 + Embedding 模型 + 向量库 + 应用界面",[125,460,461,464],{},[15,462,463],{},"持续运营","：评估体系建设、效果迭代、新场景扩展",[11,466,467,472],{},[468,469,471],"a",{"href":470},"/contact","联系我们","获取企业知识库免费方案评估。",[474,475],"hr",{},[11,477,478,271,481,485,486],{},[15,479,480],{},"相关阅读",[468,482,484],{"href":483},"/blog/deepseek-private-deployment-guide","DeepSeek 私有化部署完整指南"," | ",[468,487,489],{"href":488},"/blog/traditional-enterprise-ai-challenges","传统企业 AI 落地的真实困境",{"title":312,"searchDepth":491,"depth":491,"links":492},2,[493,494,500,504,505,506,511,512],{"id":22,"depth":491,"text":23},{"id":111,"depth":491,"text":112,"children":495},[496,498,499],{"id":119,"depth":497,"text":120},3,{"id":148,"depth":497,"text":149},{"id":175,"depth":497,"text":176},{"id":182,"depth":491,"text":183,"children":501},[502,503],{"id":189,"depth":497,"text":190},{"id":213,"depth":497,"text":214},{"id":263,"depth":491,"text":264},{"id":298,"depth":491,"text":299},{"id":331,"depth":491,"text":332,"children":507},[508,509,510],{"id":338,"depth":497,"text":339},{"id":365,"depth":497,"text":366},{"id":377,"depth":497,"text":378},{"id":398,"depth":491,"text":399},{"id":434,"depth":491,"text":435},"AI 应用",null,"2026-04-20","深入讲解企业知识库 RAG（检索增强生成）的落地路径，包含文档预处理、向量化、检索策略、Prompt 设计、效果评估全流程。","md",{},true,"/blog/enterprise-rag-guide",10,{"title":5,"description":516},"blog/enterprise-rag-guide",[51,525,526,527,528],"企业知识库","向量数据库","大模型应用","智能问答","Y0G6dTIxg0ImOVUAy2MlgEoJR7edaGj3_G8mTRjzN5U",[531,921],{"id":532,"title":533,"author":6,"body":534,"category":907,"cover":514,"date":908,"description":909,"extension":517,"meta":910,"navigation":519,"path":488,"readingTime":911,"seo":912,"stem":913,"tags":914,"__hash__":920},"blog/blog/traditional-enterprise-ai-challenges.md","传统企业 AI 落地为什么这么难？三个真实案例分析",{"type":8,"value":535,"toc":883},[536,543,547,550,557,561,564,575,578,584,595,599,602,634,644,648,651,654,657,663,666,677,680,683,712,715,720,724,727,730,733,736,750,753,760,786,789,798,802,806,809,813,816,820,827,831,834,860,866,872,874],[11,537,538,539,542],{},"DeepSeek 火了之后，老板们都在问\"我们怎么用 AI\"。但真正动手做的传统企业里，",[15,540,541],{},"有 70% 的项目在 6 个月内被搁置","。原因不是 AI 不行，而是落地路径选错了。本文用三个真实案例（已脱敏），讲清楚传统企业 AI 落地的难点和正确姿势。",[20,544,546],{"id":545},"案例一某制造集团的ai-客服折戟","案例一：某制造集团的\"AI 客服\"折戟",[117,548,549],{"id":549},"背景",[11,551,552,553,556],{},"一家年产值 50 亿的工业设备制造商，2024 年初决定上 AI。老板的诉求很明确：",[15,554,555],{},"\"我看别人都做 AI 客服，我们也来一套\"","。",[117,558,560],{"id":559},"第一次尝试失败","第一次尝试（失败）",[11,562,563],{},"公司 IT 部门花 30 万买了某 SaaS 厂商的\"通用 AI 客服\"。3 个月后下线，原因：",[122,565,566,569,572],{},[125,567,568],{},"客户问的都是\"3 号轴承能不能配 5 号设备\"这种专业问题",[125,570,571],{},"通用模型完全答不上来，只会说\"建议联系人工客服\"",[125,573,574],{},"客户体验比之前的电话客服还差",[117,576,577],{"id":577},"失败的根本原因",[11,579,580,583],{},[15,581,582],{},"\"AI 客服\"是结果，不是起点。"," 老板只看到别人有 AI 客服，没看到背后需要：",[122,585,586,589,592],{},[125,587,588],{},"完整的产品知识库（这家公司的产品手册散落在 5 个部门的硬盘里）",[125,590,591],{},"历史客服对话数据（之前都用电话，根本没数字化）",[125,593,594],{},"业务规则梳理（哪些问题可以自动回，哪些必须转人工）",[117,596,598],{"id":597},"第二次尝试成功","第二次尝试（成功）",[11,600,601],{},"仙宫云接手后，重新规划路径：",[273,603,604,610,616,622,628],{},[125,605,606,609],{},[15,607,608],{},"第 1 个月","：整理产品手册，建私有知识库",[125,611,612,615],{},[15,613,614],{},"第 2 个月","：部署 DeepSeek-32B + RAG，先给内部销售工程师用",[125,617,618,621],{},[15,619,620],{},"第 3 个月","：收集 500+ 真实问题，迭代 Prompt 和切片策略",[125,623,624,627],{},[15,625,626],{},"第 4 个月","：开放给经销商，验证准确率达 85%",[125,629,630,633],{},[15,631,632],{},"第 6 个月","：上线 C 端客服",[11,635,636,639,640,643],{},[15,637,638],{},"关键洞察","：传统企业上 AI，",[15,641,642],{},"先做内部工具，再做对外应用","。内部用户容忍度高，是 AI 应用最好的 POC 场景。",[20,645,647],{"id":646},"案例二某连锁零售的ai-推荐失灵","案例二：某连锁零售的\"AI 推荐失灵\"",[117,649,549],{"id":650},"背景-1",[11,652,653],{},"300+ 门店连锁餐饮品牌，想做\"AI 个性化菜品推荐\"。第三方乙方报价 80 万，承诺三个月上线。",[117,655,656],{"id":656},"失败点",[11,658,659,660,556],{},"3 个月后系统上线，但经理反馈：",[15,661,662],{},"推荐的菜还不如收银员根据天气和时段拍脑袋的准",[11,664,665],{},"复盘发现：",[122,667,668,671,674],{},[125,669,670],{},"训练数据只有近 6 个月销售记录，没有节假日、天气、促销变量",[125,672,673],{},"\"推荐\"这个动作没有融入门店实际运营流程，店员根本不看",[125,675,676],{},"没有 A/B 测试机制，无法证明\"AI 推荐 vs 人工推荐\"哪个更好",[117,678,679],{"id":679},"改进路径",[11,681,682],{},"仙宫云重新介入，把\"AI 推荐\"拆成三个更小的问题：",[273,684,685,694,703],{},[125,686,687,690,691],{},[15,688,689],{},"新菜上市预测","：基于历史数据预测某门店上新菜的销量，",[15,692,693],{},"辅助采购",[125,695,696,699,700],{},[15,697,698],{},"库存预警","：哪些菜品在哪些门店即将售罄/滞销，",[15,701,702],{},"辅助调拨",[125,704,705,708,709],{},[15,706,707],{},"门店选址洞察","：开新店时基于周边数据生成评估报告，",[15,710,711],{},"辅助决策",[11,713,714],{},"这三个场景都是\"AI 给建议，人做决策\"，门店运营效率提升 18%，年化收益约 2400 万。",[11,716,717,719],{},[15,718,638],{},"：传统行业不要追求\"AI 替代人\"，先做\"AI 辅助决策\"。决策权留给业务人员，反而推广得更顺。",[20,721,723],{"id":722},"案例三某三甲医院的合规死局","案例三：某三甲医院的合规死局",[117,725,549],{"id":726},"背景-2",[11,728,729],{},"某省级三甲医院想做 AI 病历助手，提升医生写病历效率（医生抱怨写病历占 30% 工作时间）。",[117,731,732],{"id":732},"三个月没动起来",[11,734,735],{},"不是技术问题，是数据问题：",[122,737,738,744,747],{},[125,739,740,741],{},"病历是核心医疗数据，根据《医疗机构病历管理规定》和等保三级要求，",[15,742,743],{},"绝对不能上公有云",[125,745,746],{},"院内 IT 团队没有大模型经验",[125,748,749],{},"厂商方案要求开放外网，被信息科一票否决",[117,751,752],{"id":752},"解决方案",[11,754,755,756,759],{},"仙宫云的方案完全围绕\"",[15,757,758],{},"数据不出院","\"设计：",[273,761,762,768,774,780],{},[125,763,764,767],{},[15,765,766],{},"硬件","：在医院信息中心机房部署 2× A100 GPU 服务器",[125,769,770,773],{},[15,771,772],{},"模型","：本地化部署 DeepSeek-R1-Distill-Qwen-32B + 医疗领域微调",[125,775,776,779],{},[15,777,778],{},"应用","：与院内 HIS/EMR 系统对接，医生在熟悉的系统里使用 AI 辅助",[125,781,782,785],{},[15,783,784],{},"合规","：通过院内信息安全审计、审计日志全留痕",[11,787,788],{},"效果：医生病历书写时间减少 50%，3 个月内全院推广。",[11,790,791,793,794,797],{},[15,792,638],{},"：合规要求严格的行业（金融、医疗、政务、能源），",[15,795,796],{},"私有化部署不是可选项，是必选项","。任何方案绕不过这一条。",[20,799,801],{"id":800},"总结传统企业-ai-落地的三条铁律","总结：传统企业 AI 落地的三条铁律",[117,803,805],{"id":804},"_1-不要从我要做-x开始要从我想解决-y开始","1. 不要从\"我要做 X\"开始，要从\"我想解决 Y\"开始",[11,807,808],{},"\"做 AI 客服\"是结果，\"客户咨询响应慢导致流失\"是问题。从问题出发才能选对路径。",[117,810,812],{"id":811},"_2-先内部再外部先辅助再替代","2. 先内部，再外部；先辅助，再替代",[11,814,815],{},"内部工具是最好的 AI 试验田，员工反馈快、容错高。\"AI 替代人\"的项目失败率远高于\"AI 辅助人\"。",[117,817,819],{"id":818},"_3-合规与数据安全是前置条件不是可选项","3. 合规与数据安全是前置条件，不是可选项",[11,821,822,823,826],{},"任何涉及客户/员工/经营数据的 AI 项目，",[15,824,825],{},"先想清楚数据怎么走、合规怎么过","。私有化部署是大多数传统企业的唯一答案。",[20,828,830],{"id":829},"仙宫云的传统企业-ai-落地方法论","仙宫云的传统企业 AI 落地方法论",[11,832,833],{},"我们服务过的 50+ 传统企业，提炼出一套\"四阶段陪跑\"方法：",[273,835,836,842,848,854],{},[125,837,838,841],{},[15,839,840],{},"诊断期（2-4 周）","：业务调研 + AI 高价值场景识别",[125,843,844,847],{},[15,845,846],{},"POC 期（4-8 周）","：选定 1-2 个场景小规模验证",[125,849,850,853],{},[15,851,852],{},"推广期（2-3 个月）","：场景扩展 + 员工培训 + 流程嵌入",[125,855,856,859],{},[15,857,858],{},"运营期（持续）","：效果监控 + 迭代优化 + 新场景挖掘",[11,861,862,865],{},[15,863,864],{},"真正难的不是部署模型，而是把 AI 嵌入业务流程并让员工用起来。"," 这是仙宫云区别于纯技术乙方的核心价值。",[11,867,868,869,871],{},"如果你的企业正在评估 AI 落地路径，欢迎",[468,870,471],{"href":470},"获取免费的场景诊断与可行性评估。",[474,873],{},[11,875,876,271,878,485,880],{},[15,877,480],{},[468,879,484],{"href":483},[468,881,882],{"href":520},"企业知识库 RAG 实战教程",{"title":312,"searchDepth":491,"depth":491,"links":884},[885,891,896,901,906],{"id":545,"depth":491,"text":546,"children":886},[887,888,889,890],{"id":549,"depth":497,"text":549},{"id":559,"depth":497,"text":560},{"id":577,"depth":497,"text":577},{"id":597,"depth":497,"text":598},{"id":646,"depth":491,"text":647,"children":892},[893,894,895],{"id":650,"depth":497,"text":549},{"id":656,"depth":497,"text":656},{"id":679,"depth":497,"text":679},{"id":722,"depth":491,"text":723,"children":897},[898,899,900],{"id":726,"depth":497,"text":549},{"id":732,"depth":497,"text":732},{"id":752,"depth":497,"text":752},{"id":800,"depth":491,"text":801,"children":902},[903,904,905],{"id":804,"depth":497,"text":805},{"id":811,"depth":497,"text":812},{"id":818,"depth":497,"text":819},{"id":829,"depth":491,"text":830},"行业洞察","2026-04-28","从制造、零售、医疗三个真实行业案例，分析传统企业 AI 落地的核心挑战与可行路径，帮助决策者避坑。",{},9,{"title":533,"description":909},"blog/traditional-enterprise-ai-challenges",[915,916,917,918,919],"企业AI落地","AI转型","制造业AI","零售AI","医疗AI","LkS3KZCh7u39y97nZw71AVsAs5WvLsLxPbtn4xjO8wM",{"id":922,"title":923,"author":6,"body":924,"category":1402,"cover":514,"date":1403,"description":1404,"extension":517,"meta":1405,"navigation":519,"path":483,"readingTime":1406,"seo":1407,"stem":1408,"tags":1409,"__hash__":1415},"blog/blog/deepseek-private-deployment-guide.md","DeepSeek 大模型私有化部署完整指南：硬件、成本与避坑要点",{"type":8,"value":925,"toc":1385},[926,933,937,940,966,970,973,1061,1067,1071,1075,1078,1104,1108,1111,1133,1137,1140,1162,1166,1169,1173,1184,1188,1199,1203,1214,1218,1221,1303,1306,1310,1342,1346,1349,1369,1375,1377],[11,927,928,929,932],{},"DeepSeek 在 2024-2025 年成为国内企业大模型私有化部署的首选之一。它开源、中文能力强、推理性能稳定，但真正落地时，企业最常问的三个问题是：",[15,930,931],{},"要什么硬件？花多少钱？怎么避坑？"," 本文给出 2026 年最新的实操答案。",[20,934,936],{"id":935},"一为什么企业要做-deepseek-私有化部署","一、为什么企业要做 DeepSeek 私有化部署？",[11,938,939],{},"调用 API 当然便宜，但当业务涉及以下任一情况，私有化部署几乎是唯一选择：",[122,941,942,948,954,960],{},[125,943,944,947],{},[15,945,946],{},"数据敏感","：客户合同、医疗记录、财务凭证、研发资料这类数据不能出企业内网",[125,949,950,953],{},[15,951,952],{},"合规要求","：等保三级、金融监管、医疗行业合规，明确要求数据本地化",[125,955,956,959],{},[15,957,958],{},"成本临界点","：当 API 月调用量超过 5000 万 tokens，自建反而更便宜",[125,961,962,965],{},[15,963,964],{},"稳定性要求","：业务系统强依赖 AI，不能因为外部 API 限流或宕机而中断",[20,967,969],{"id":968},"二模型版本怎么选","二、模型版本怎么选？",[11,971,972],{},"DeepSeek 官方目前主要开源以下几个版本，企业可根据预算和场景选择：",[37,974,975,990],{},[40,976,977],{},[43,978,979,981,984,987],{},[46,980,772],{},[46,982,983],{},"参数规模",[46,985,986],{},"推荐场景",[46,988,989],{},"最低显存（FP16）",[56,991,992,1006,1020,1034,1048],{},[43,993,994,997,1000,1003],{},[61,995,996],{},"DeepSeek-R1-Distill-Qwen-7B",[61,998,999],{},"7B",[61,1001,1002],{},"客服、简单文档问答",[61,1004,1005],{},"16 GB",[43,1007,1008,1011,1014,1017],{},[61,1009,1010],{},"DeepSeek-R1-Distill-Qwen-14B",[61,1012,1013],{},"14B",[61,1015,1016],{},"知识库 RAG、报告生成",[61,1018,1019],{},"32 GB",[43,1021,1022,1025,1028,1031],{},[61,1023,1024],{},"DeepSeek-R1-Distill-Qwen-32B",[61,1026,1027],{},"32B",[61,1029,1030],{},"复杂推理、合同审阅",[61,1032,1033],{},"64 GB",[43,1035,1036,1039,1042,1045],{},[61,1037,1038],{},"DeepSeek-V3",[61,1040,1041],{},"671B (MoE)",[61,1043,1044],{},"高级 Agent、企业核心场景",[61,1046,1047],{},"8×A100 80G 起",[43,1049,1050,1053,1055,1058],{},[61,1051,1052],{},"DeepSeek-R1",[61,1054,1041],{},[61,1056,1057],{},"复杂推理、深度思考任务",[61,1059,1060],{},"8×H100 80G 起",[11,1062,1063,1066],{},[15,1064,1065],{},"经验法则","：90% 的企业内部场景（客服、知识库、文档处理）用 14B-32B 蒸馏版就够了，不要一上来就追 671B 满血版，硬件成本会翻 10 倍以上。",[20,1068,1070],{"id":1069},"三硬件配置参考2026-年价格","三、硬件配置参考（2026 年价格）",[117,1072,1074],{"id":1073},"入门级7b-14b-模型","入门级（7B-14B 模型）",[11,1076,1077],{},"适合 30-50 人小团队、单一业务场景。",[122,1079,1080,1086,1092,1098],{},[125,1081,1082,1085],{},[15,1083,1084],{},"GPU","：1× RTX 4090（24GB）或 1× RTX A6000（48GB）",[125,1087,1088,1091],{},[15,1089,1090],{},"CPU/内存","：32 核 / 128 GB",[125,1093,1094,1097],{},[15,1095,1096],{},"存储","：2TB NVMe SSD",[125,1099,1100,1103],{},[15,1101,1102],{},"整机预算","：6-15 万元",[117,1105,1107],{"id":1106},"中型32b-模型","中型（32B 模型）",[11,1109,1110],{},"适合 100-500 人企业、多场景并发。",[122,1112,1113,1118,1123,1128],{},[125,1114,1115,1117],{},[15,1116,1084],{},"：2× A100 80G 或 4× RTX 4090",[125,1119,1120,1122],{},[15,1121,1090],{},"：64 核 / 256 GB",[125,1124,1125,1127],{},[15,1126,1096],{},"：4TB NVMe SSD",[125,1129,1130,1132],{},[15,1131,1102],{},"：35-60 万元",[117,1134,1136],{"id":1135},"旗舰级deepseek-v3r1-满血版","旗舰级（DeepSeek-V3/R1 满血版）",[11,1138,1139],{},"适合大型集团、高并发核心业务。",[122,1141,1142,1147,1152,1157],{},[125,1143,1144,1146],{},[15,1145,1084],{},"：8× H100 80G 或 8× A100 80G（NVLink 互联）",[125,1148,1149,1151],{},[15,1150,1090],{},"：128 核 / 1TB",[125,1153,1154,1156],{},[15,1155,1096],{},"：10TB+ NVMe SSD",[125,1158,1159,1161],{},[15,1160,1102],{},"：200-400 万元",[20,1163,1165],{"id":1164},"四推理框架怎么选","四、推理框架怎么选？",[11,1167,1168],{},"部署框架直接影响吞吐量和响应延迟。三个主流选择：",[117,1170,1172],{"id":1171},"_1-vllm生产首选","1. vLLM（生产首选）",[122,1174,1175,1178,1181],{},[125,1176,1177],{},"优点：吞吐量高、支持 PagedAttention、连续批处理",[125,1179,1180],{},"缺点：配置稍复杂",[125,1182,1183],{},"适用：生产环境、高并发场景",[117,1185,1187],{"id":1186},"_2-ollama最简单","2. Ollama（最简单）",[122,1189,1190,1193,1196],{},[125,1191,1192],{},"优点：一行命令启动、支持量化模型",[125,1194,1195],{},"缺点：单机性能有限，不适合高并发",[125,1197,1198],{},"适用：POC 验证、小团队内部使用",[117,1200,1202],{"id":1201},"_3-sglang前沿","3. SGLang（前沿）",[122,1204,1205,1208,1211],{},[125,1206,1207],{},"优点：结构化生成快，工具调用场景表现好",[125,1209,1210],{},"缺点：生态相对新",[125,1212,1213],{},"适用：Agent 应用、复杂推理",[20,1215,1217],{"id":1216},"五典型企业部署成本拆解","五、典型企业部署成本拆解",[11,1219,1220],{},"以一个 200 人制造企业部署 DeepSeek-R1-Distill-Qwen-32B 为例：",[37,1222,1223,1236],{},[40,1224,1225],{},[43,1226,1227,1230,1233],{},[46,1228,1229],{},"项目",[46,1231,1232],{},"一次性",[46,1234,1235],{},"年化",[56,1237,1238,1249,1259,1269,1279,1289],{},[43,1239,1240,1243,1246],{},[61,1241,1242],{},"硬件采购（2× A100）",[61,1244,1245],{},"45 万",[61,1247,1248],{},"-",[43,1250,1251,1254,1257],{},[61,1252,1253],{},"机房环境改造",[61,1255,1256],{},"5 万",[61,1258,1248],{},[43,1260,1261,1264,1267],{},[61,1262,1263],{},"部署实施服务",[61,1265,1266],{},"8-15 万",[61,1268,1248],{},[43,1270,1271,1274,1276],{},[61,1272,1273],{},"电费（24/7 运行）",[61,1275,1248],{},[61,1277,1278],{},"3-5 万",[43,1280,1281,1284,1286],{},[61,1282,1283],{},"运维与模型更新",[61,1285,1248],{},[61,1287,1288],{},"6-12 万",[43,1290,1291,1296,1301],{},[61,1292,1293],{},[15,1294,1295],{},"三年总成本",[61,1297,1298],{},[15,1299,1300],{},"约 75-90 万",[61,1302,1248],{},[11,1304,1305],{},"对照 API 方案：同样规模业务调用，按 0.001 元/千 tokens 估算，三年通常在 30-150 万之间——但数据出域、不可控、长期议价权弱。",[20,1307,1309],{"id":1308},"六企业落地最容易踩的-5-个坑","六、企业落地最容易踩的 5 个坑",[273,1311,1312,1318,1324,1330,1336],{},[125,1313,1314,1317],{},[15,1315,1316],{},"追求满血版","：90% 场景蒸馏版足够，盲目上 671B 浪费硬件",[125,1319,1320,1323],{},[15,1321,1322],{},"忽视吞吐量测试","：部署完才发现并发 10 人就卡，前期没做压测",[125,1325,1326,1329],{},[15,1327,1328],{},"没做模型评估","：直接选最火的，没用自家业务数据测准确率",[125,1331,1332,1335],{},[15,1333,1334],{},"忽略 RAG 配套","：模型部署完没接知识库，用户体验和直接调 API 没区别",[125,1337,1338,1341],{},[15,1339,1340],{},"缺乏运维计划","：模型发版迭代、显卡故障处理、效果回归没人管",[20,1343,1345],{"id":1344},"七仙宫云的部署服务","七、仙宫云的部署服务",[11,1347,1348],{},"仙宫云已为多家制造、零售、医疗、金融企业完成 DeepSeek 私有化部署，提供：",[122,1350,1351,1357,1363],{},[125,1352,1353,1356],{},[15,1354,1355],{},"部署前","：业务场景评估、模型选型、硬件方案、ROI 测算",[125,1358,1359,1362],{},[15,1360,1361],{},"部署中","：硬件部署、模型推理优化、RAG 知识库集成、应用对接",[125,1364,1365,1368],{},[15,1366,1367],{},"部署后","：员工培训、效果监控、模型版本升级、长期陪跑",[11,1370,1371,1372,1374],{},"如果你正在评估 DeepSeek 私有化部署，欢迎",[468,1373,471],{"href":470},"获取免费方案评估。",[474,1376],{},[11,1378,1379,271,1381,485,1383],{},[15,1380,480],{},[468,1382,882],{"href":520},[468,1384,489],{"href":488},{"title":312,"searchDepth":491,"depth":491,"links":1386},[1387,1388,1389,1394,1399,1400,1401],{"id":935,"depth":491,"text":936},{"id":968,"depth":491,"text":969},{"id":1069,"depth":491,"text":1070,"children":1390},[1391,1392,1393],{"id":1073,"depth":497,"text":1074},{"id":1106,"depth":497,"text":1107},{"id":1135,"depth":497,"text":1136},{"id":1164,"depth":491,"text":1165,"children":1395},[1396,1397,1398],{"id":1171,"depth":497,"text":1172},{"id":1186,"depth":497,"text":1187},{"id":1201,"depth":497,"text":1202},{"id":1216,"depth":491,"text":1217},{"id":1308,"depth":491,"text":1309},{"id":1344,"depth":491,"text":1345},"私有化部署","2026-04-12","一篇文章看懂 DeepSeek-R1/V3 私有化部署所需的硬件、显存、推理框架选择、典型成本区间与企业落地常见坑，2026 年最新版。",{},12,{"title":923,"description":1404},"blog/deepseek-private-deployment-guide",[1410,1411,1412,1413,1414],"DeepSeek","大模型私有化部署","本地化部署","vLLM","Ollama","CnUfK8LxNz_IpO364Os_X0FVfj8Hvm3vAdpqq6ePD7A",1778068159125]