From 7918ba7d29698a39dc0141e20284d4946d13fa9a Mon Sep 17 00:00:00 2001 From: YunaiV Date: Fri, 28 Feb 2025 08:16:44 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90=E5=8A=9F=E8=83=BD=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E3=80=91AI=EF=BC=9A=E6=96=B0=E5=A2=9E=E7=9F=A5=E8=AF=86?= =?UTF-8?q?=E5=BA=93=E6=96=87=E6=A1=A3=E7=9A=84=E6=89=B9=E9=87=8F=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../module/ai/enums/ErrorCodeConstants.java | 1 + .../AiKnowledgeDocumentController.http | 25 +++++++- .../AiKnowledgeDocumentController.java | 15 ++++- .../AiKnowledgeDocumentCreateListReqVO.java | 42 +++++++++++++ .../knowledge/AiKnowledgeDocumentService.java | 11 ++++ .../AiKnowledgeDocumentServiceImpl.java | 60 +++++++++++++++---- .../knowledge/AiKnowledgeSegmentService.java | 12 ++++ .../AiKnowledgeSegmentServiceImpl.java | 6 +- 8 files changed, 154 insertions(+), 18 deletions(-) create mode 100644 yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeDocumentCreateListReqVO.java diff --git a/yudao-module-ai/yudao-module-ai-api/src/main/java/cn/iocoder/yudao/module/ai/enums/ErrorCodeConstants.java b/yudao-module-ai/yudao-module-ai-api/src/main/java/cn/iocoder/yudao/module/ai/enums/ErrorCodeConstants.java index 48913e91fa..5ac6d2ae9e 100644 --- a/yudao-module-ai/yudao-module-ai-api/src/main/java/cn/iocoder/yudao/module/ai/enums/ErrorCodeConstants.java +++ b/yudao-module-ai/yudao-module-ai-api/src/main/java/cn/iocoder/yudao/module/ai/enums/ErrorCodeConstants.java @@ -56,6 +56,7 @@ public interface ErrorCodeConstants { ErrorCode KNOWLEDGE_DOCUMENT_NOT_EXISTS = new ErrorCode(1_022_008_101, "文档不存在!"); ErrorCode KNOWLEDGE_DOCUMENT_FILE_EMPTY = new ErrorCode(1_022_008_102, "文档内容为空!"); + ErrorCode KNOWLEDGE_DOCUMENT_FILE_DOWNLOAD_FAIL = new ErrorCode(1_022_008_102, "文件下载失败!"); ErrorCode KNOWLEDGE_DOCUMENT_FILE_READ_FAIL = new ErrorCode(1_022_008_102, "文档加载失败!"); ErrorCode KNOWLEDGE_SEGMENT_NOT_EXISTS = new ErrorCode(1_022_008_202, "段落不存在!"); diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeDocumentController.http b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeDocumentController.http index 7af4780c3c..22c1d91115 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeDocumentController.http +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeDocumentController.http @@ -9,4 +9,27 @@ tenant-id: {{adminTenantId}} "name": "测试文档", "url": "https://static.iocoder.cn/README.md", "segmentMaxTokens": 800 -} \ No newline at end of file +} + +### 批量创建知识文档 +POST {{baseUrl}}/ai/knowledge/document/create-list +Content-Type: application/json +Authorization: Bearer {{token}} +tenant-id: {{adminTenantId}} + +{ + "knowledgeId": 1, + "list": [ + { + "name": "测试文档1", + "url": "https://static.iocoder.cn/README.md", + "segmentMaxTokens": 800 + }, + { + "name": "测试文档2", + "url": "https://static.iocoder.cn/README_yudao.md", + "segmentMaxTokens": 400 + } + ] +} + diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeDocumentController.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeDocumentController.java index cd6feb3056..8d61d0c1e4 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeDocumentController.java +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeDocumentController.java @@ -6,6 +6,7 @@ import cn.iocoder.yudao.framework.common.util.object.BeanUtils; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentPageReqVO; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentRespVO; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentUpdateReqVO; +import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateListReqVO; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateReqVO; import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO; import cn.iocoder.yudao.module.ai.service.knowledge.AiKnowledgeDocumentService; @@ -16,6 +17,8 @@ import jakarta.validation.Valid; import org.springframework.validation.annotation.Validated; import org.springframework.web.bind.annotation.*; +import java.util.List; + import static cn.iocoder.yudao.framework.common.pojo.CommonResult.success; @Tag(name = "管理后台 - AI 知识库文档") @@ -38,8 +41,16 @@ public class AiKnowledgeDocumentController { @PostMapping("/create") @Operation(summary = "新建文档") public CommonResult createKnowledgeDocument(@RequestBody @Valid AiKnowledgeDocumentCreateReqVO reqVO) { - Long knowledgeDocumentId = documentService.createKnowledgeDocument(reqVO); - return success(knowledgeDocumentId); + Long id = documentService.createKnowledgeDocument(reqVO); + return success(id); + } + + @PostMapping("/create-list") + @Operation(summary = "批量新建文档") + public CommonResult> createKnowledgeDocumentList( + @RequestBody @Valid AiKnowledgeDocumentCreateListReqVO reqVO) { + List ids = documentService.createKnowledgeDocumentList(reqVO); + return success(ids); } @PutMapping("/update") diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeDocumentCreateListReqVO.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeDocumentCreateListReqVO.java new file mode 100644 index 0000000000..4bd817aa4c --- /dev/null +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeDocumentCreateListReqVO.java @@ -0,0 +1,42 @@ +package cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge; + +import io.swagger.v3.oas.annotations.media.Schema; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.NotNull; +import lombok.Data; +import org.hibernate.validator.constraints.URL; + +import java.util.List; + +@Schema(description = "管理后台 - AI 知识库文档批量创建 Request VO") +@Data +public class AiKnowledgeDocumentCreateListReqVO { + + @Schema(description = "知识库编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1204") + @NotNull(message = "知识库编号不能为空") + private Long knowledgeId; + + @Schema(description = "文档列表", requiredMode = Schema.RequiredMode.REQUIRED) + @NotEmpty(message = "文档列表不能为空") + private List list; + + @Schema(description = "文档") + @Data + public static class Document { + + @Schema(description = "文档名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "三方登陆") + @NotBlank(message = "文档名称不能为空") + private String name; + + @Schema(description = "文档 URL", requiredMode = Schema.RequiredMode.REQUIRED, example = "https://doc.iocoder.cn") + @URL(message = "文档 URL 格式不正确") + private String url; + + @Schema(description = "分段的最大 Token 数", requiredMode = Schema.RequiredMode.REQUIRED, example = "800") + @NotNull(message = "分段的最大 Token 数不能为空") + private Integer segmentMaxTokens; + + } + +} \ No newline at end of file diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentService.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentService.java index 9cb7c5a827..41fa41527d 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentService.java +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentService.java @@ -4,8 +4,11 @@ import cn.iocoder.yudao.framework.common.pojo.PageResult; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentPageReqVO; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentUpdateReqVO; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateReqVO; +import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateListReqVO; import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO; +import java.util.List; + /** * AI 知识库-文档 Service 接口 * @@ -21,6 +24,14 @@ public interface AiKnowledgeDocumentService { */ Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO); + /** + * 批量创建文档 + * + * @param createListReqVO 批量创建 Request VO + * @return 文档编号列表 + */ + List createKnowledgeDocumentList(AiKnowledgeDocumentCreateListReqVO createListReqVO); + /** * 获取文档分页 * diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java index 0f48d5a992..fd45b97062 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java @@ -8,6 +8,7 @@ import cn.iocoder.yudao.framework.common.pojo.PageResult; import cn.iocoder.yudao.framework.common.util.object.BeanUtils; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentPageReqVO; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentUpdateReqVO; +import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateListReqVO; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateReqVO; import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO; import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeDocumentMapper; @@ -21,9 +22,11 @@ import org.springframework.core.io.ByteArrayResource; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; +import java.util.ArrayList; import java.util.List; import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception; +import static cn.iocoder.yudao.framework.common.util.collection.CollectionUtils.convertList; import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.*; /** @@ -54,25 +57,45 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic knowledgeService.validateKnowledgeExists(createReqVO.getKnowledgeId()); // 2. 下载文档 - TikaDocumentReader loader = new TikaDocumentReader(downloadFile(createReqVO.getUrl())); - List documents = loader.get(); - Document document = CollUtil.getFirst(documents); - if (document == null || StrUtil.isEmpty(document.getText())) { - throw exception(KNOWLEDGE_DOCUMENT_FILE_READ_FAIL); - } + String content = readUrl(createReqVO.getUrl()); // 3. 文档记录入库 - String content = document.getText(); AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class) .setContent(content).setContentLength(content.length()).setTokens(tokenCountEstimator.estimate(content)) .setStatus(CommonStatusEnum.ENABLE.getStatus()); knowledgeDocumentMapper.insert(documentDO); - // 4. 文档切片入库 - knowledgeSegmentService.createKnowledgeSegmentBySplitContent(documentDO.getId(), document.getText()); + // 4. 文档切片入库(同步) + knowledgeSegmentService.createKnowledgeSegmentBySplitContent(documentDO.getId(), content); return documentDO.getId(); } + @Override + @Transactional(rollbackFor = Exception.class) + public List createKnowledgeDocumentList(AiKnowledgeDocumentCreateListReqVO createListReqVO) { + // 1. 校验参数 + knowledgeService.validateKnowledgeExists(createListReqVO.getKnowledgeId()); + + // 2. 下载文档 + List contents = convertList(createListReqVO.getList(), document -> readUrl(document.getUrl())); + + // 3. 文档记录入库 + List documentDOs = new ArrayList<>(createListReqVO.getList().size()); + for (int i = 0; i < createListReqVO.getList().size(); i++) { + AiKnowledgeDocumentCreateListReqVO.Document documentVO = createListReqVO.getList().get(i); + String content = contents.get(i); + documentDOs.add(BeanUtils.toBean(documentVO, AiKnowledgeDocumentDO.class).setKnowledgeId(createListReqVO.getKnowledgeId()) + .setContent(content).setContentLength(content.length()).setTokens(tokenCountEstimator.estimate(content)) + .setStatus(CommonStatusEnum.ENABLE.getStatus())); + } + knowledgeDocumentMapper.insertBatch(documentDOs); + + // 4. 批量创建文档切片(异步) + documentDOs.forEach(documentDO -> + knowledgeSegmentService.createKnowledgeSegmentBySplitContentAsync(documentDO.getId(), documentDO.getContent())); + return convertList(documentDOs, AiKnowledgeDocumentDO::getId); + } + @Override public PageResult getKnowledgeDocumentPage(AiKnowledgeDocumentPageReqVO pageReqVO) { return knowledgeDocumentMapper.selectPage(pageReqVO); @@ -97,17 +120,28 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic return knowledgeDocument; } - private org.springframework.core.io.Resource downloadFile(String url) { + private static String readUrl(String url) { + // 下载文件 + ByteArrayResource resource = null; try { byte[] bytes = HttpUtil.downloadBytes(url); if (bytes.length == 0) { throw exception(KNOWLEDGE_DOCUMENT_FILE_EMPTY); } - return new ByteArrayResource(bytes); + resource = new ByteArrayResource(bytes); } catch (Exception e) { - log.error("[downloadFile][url({}) 下载失败]", url, e); - throw new RuntimeException(e); + log.error("[readUrl][url({}) 读取失败]", url, e); + throw exception(KNOWLEDGE_DOCUMENT_FILE_DOWNLOAD_FAIL); } + + // 读取文件 + TikaDocumentReader loader = new TikaDocumentReader(resource); + List documents = loader.get(); + Document document = CollUtil.getFirst(documents); + if (document == null || StrUtil.isEmpty(document.getText())) { + throw exception(KNOWLEDGE_DOCUMENT_FILE_READ_FAIL); + } + return document.getText(); } } diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentService.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentService.java index 2a8d9afd87..064d373b73 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentService.java +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentService.java @@ -6,6 +6,7 @@ import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowle import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentUpdateReqVO; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentUpdateStatusReqVO; import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO; +import org.springframework.scheduling.annotation.Async; import java.util.List; @@ -32,6 +33,17 @@ public interface AiKnowledgeSegmentService { */ void createKnowledgeSegmentBySplitContent(Long documentId, String content); + /** + * 【异步】基于 content 内容,切片创建多个段落 + * + * @param documentId 知识库文档编号 + * @param content 文档内容 + */ + @Async + default void createKnowledgeSegmentBySplitContentAsync(Long documentId, String content) { + createKnowledgeSegmentBySplitContent(documentId, content); + } + /** * 更新段落的内容 * diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java index 9b15135c95..615299b2db 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java @@ -110,8 +110,10 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService // 3.1 更新切片 AiKnowledgeSegmentDO segmentDO = BeanUtils.toBean(reqVO, AiKnowledgeSegmentDO.class); segmentMapper.updateById(segmentDO); - // 3.2 重新向量化 - writeVectorStore(vectorStore, segmentDO, new Document(segmentDO.getContent())); + // 3.2 重新向量化,必须开启状态 + if (CommonStatusEnum.isEnable(segmentDO.getStatus())) { + writeVectorStore(vectorStore, segmentDO, new Document(segmentDO.getContent())); + } } @Override