【功能新增】AI:新增知识库文档的批量添加
This commit is contained in:
parent
0a8c75625a
commit
7918ba7d29
|
@ -56,6 +56,7 @@ public interface ErrorCodeConstants {
|
||||||
|
|
||||||
ErrorCode KNOWLEDGE_DOCUMENT_NOT_EXISTS = new ErrorCode(1_022_008_101, "文档不存在!");
|
ErrorCode KNOWLEDGE_DOCUMENT_NOT_EXISTS = new ErrorCode(1_022_008_101, "文档不存在!");
|
||||||
ErrorCode KNOWLEDGE_DOCUMENT_FILE_EMPTY = new ErrorCode(1_022_008_102, "文档内容为空!");
|
ErrorCode KNOWLEDGE_DOCUMENT_FILE_EMPTY = new ErrorCode(1_022_008_102, "文档内容为空!");
|
||||||
|
ErrorCode KNOWLEDGE_DOCUMENT_FILE_DOWNLOAD_FAIL = new ErrorCode(1_022_008_102, "文件下载失败!");
|
||||||
ErrorCode KNOWLEDGE_DOCUMENT_FILE_READ_FAIL = new ErrorCode(1_022_008_102, "文档加载失败!");
|
ErrorCode KNOWLEDGE_DOCUMENT_FILE_READ_FAIL = new ErrorCode(1_022_008_102, "文档加载失败!");
|
||||||
|
|
||||||
ErrorCode KNOWLEDGE_SEGMENT_NOT_EXISTS = new ErrorCode(1_022_008_202, "段落不存在!");
|
ErrorCode KNOWLEDGE_SEGMENT_NOT_EXISTS = new ErrorCode(1_022_008_202, "段落不存在!");
|
||||||
|
|
|
@ -10,3 +10,26 @@ tenant-id: {{adminTenantId}}
|
||||||
"url": "https://static.iocoder.cn/README.md",
|
"url": "https://static.iocoder.cn/README.md",
|
||||||
"segmentMaxTokens": 800
|
"segmentMaxTokens": 800
|
||||||
}
|
}
|
||||||
|
|
||||||
|
### 批量创建知识文档
|
||||||
|
POST {{baseUrl}}/ai/knowledge/document/create-list
|
||||||
|
Content-Type: application/json
|
||||||
|
Authorization: Bearer {{token}}
|
||||||
|
tenant-id: {{adminTenantId}}
|
||||||
|
|
||||||
|
{
|
||||||
|
"knowledgeId": 1,
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"name": "测试文档1",
|
||||||
|
"url": "https://static.iocoder.cn/README.md",
|
||||||
|
"segmentMaxTokens": 800
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "测试文档2",
|
||||||
|
"url": "https://static.iocoder.cn/README_yudao.md",
|
||||||
|
"segmentMaxTokens": 400
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@ import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
|
||||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentPageReqVO;
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentPageReqVO;
|
||||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentRespVO;
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentRespVO;
|
||||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentUpdateReqVO;
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentUpdateReqVO;
|
||||||
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateListReqVO;
|
||||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateReqVO;
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateReqVO;
|
||||||
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
|
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
|
||||||
import cn.iocoder.yudao.module.ai.service.knowledge.AiKnowledgeDocumentService;
|
import cn.iocoder.yudao.module.ai.service.knowledge.AiKnowledgeDocumentService;
|
||||||
|
@ -16,6 +17,8 @@ import jakarta.validation.Valid;
|
||||||
import org.springframework.validation.annotation.Validated;
|
import org.springframework.validation.annotation.Validated;
|
||||||
import org.springframework.web.bind.annotation.*;
|
import org.springframework.web.bind.annotation.*;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import static cn.iocoder.yudao.framework.common.pojo.CommonResult.success;
|
import static cn.iocoder.yudao.framework.common.pojo.CommonResult.success;
|
||||||
|
|
||||||
@Tag(name = "管理后台 - AI 知识库文档")
|
@Tag(name = "管理后台 - AI 知识库文档")
|
||||||
|
@ -38,8 +41,16 @@ public class AiKnowledgeDocumentController {
|
||||||
@PostMapping("/create")
|
@PostMapping("/create")
|
||||||
@Operation(summary = "新建文档")
|
@Operation(summary = "新建文档")
|
||||||
public CommonResult<Long> createKnowledgeDocument(@RequestBody @Valid AiKnowledgeDocumentCreateReqVO reqVO) {
|
public CommonResult<Long> createKnowledgeDocument(@RequestBody @Valid AiKnowledgeDocumentCreateReqVO reqVO) {
|
||||||
Long knowledgeDocumentId = documentService.createKnowledgeDocument(reqVO);
|
Long id = documentService.createKnowledgeDocument(reqVO);
|
||||||
return success(knowledgeDocumentId);
|
return success(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
@PostMapping("/create-list")
|
||||||
|
@Operation(summary = "批量新建文档")
|
||||||
|
public CommonResult<List<Long>> createKnowledgeDocumentList(
|
||||||
|
@RequestBody @Valid AiKnowledgeDocumentCreateListReqVO reqVO) {
|
||||||
|
List<Long> ids = documentService.createKnowledgeDocumentList(reqVO);
|
||||||
|
return success(ids);
|
||||||
}
|
}
|
||||||
|
|
||||||
@PutMapping("/update")
|
@PutMapping("/update")
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
package cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge;
|
||||||
|
|
||||||
|
import io.swagger.v3.oas.annotations.media.Schema;
|
||||||
|
import jakarta.validation.constraints.NotBlank;
|
||||||
|
import jakarta.validation.constraints.NotEmpty;
|
||||||
|
import jakarta.validation.constraints.NotNull;
|
||||||
|
import lombok.Data;
|
||||||
|
import org.hibernate.validator.constraints.URL;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@Schema(description = "管理后台 - AI 知识库文档批量创建 Request VO")
|
||||||
|
@Data
|
||||||
|
public class AiKnowledgeDocumentCreateListReqVO {
|
||||||
|
|
||||||
|
@Schema(description = "知识库编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1204")
|
||||||
|
@NotNull(message = "知识库编号不能为空")
|
||||||
|
private Long knowledgeId;
|
||||||
|
|
||||||
|
@Schema(description = "文档列表", requiredMode = Schema.RequiredMode.REQUIRED)
|
||||||
|
@NotEmpty(message = "文档列表不能为空")
|
||||||
|
private List<Document> list;
|
||||||
|
|
||||||
|
@Schema(description = "文档")
|
||||||
|
@Data
|
||||||
|
public static class Document {
|
||||||
|
|
||||||
|
@Schema(description = "文档名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "三方登陆")
|
||||||
|
@NotBlank(message = "文档名称不能为空")
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
@Schema(description = "文档 URL", requiredMode = Schema.RequiredMode.REQUIRED, example = "https://doc.iocoder.cn")
|
||||||
|
@URL(message = "文档 URL 格式不正确")
|
||||||
|
private String url;
|
||||||
|
|
||||||
|
@Schema(description = "分段的最大 Token 数", requiredMode = Schema.RequiredMode.REQUIRED, example = "800")
|
||||||
|
@NotNull(message = "分段的最大 Token 数不能为空")
|
||||||
|
private Integer segmentMaxTokens;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -4,8 +4,11 @@ import cn.iocoder.yudao.framework.common.pojo.PageResult;
|
||||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentPageReqVO;
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentPageReqVO;
|
||||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentUpdateReqVO;
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentUpdateReqVO;
|
||||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateReqVO;
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateReqVO;
|
||||||
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateListReqVO;
|
||||||
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
|
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* AI 知识库-文档 Service 接口
|
* AI 知识库-文档 Service 接口
|
||||||
*
|
*
|
||||||
|
@ -21,6 +24,14 @@ public interface AiKnowledgeDocumentService {
|
||||||
*/
|
*/
|
||||||
Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO);
|
Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 批量创建文档
|
||||||
|
*
|
||||||
|
* @param createListReqVO 批量创建 Request VO
|
||||||
|
* @return 文档编号列表
|
||||||
|
*/
|
||||||
|
List<Long> createKnowledgeDocumentList(AiKnowledgeDocumentCreateListReqVO createListReqVO);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 获取文档分页
|
* 获取文档分页
|
||||||
*
|
*
|
||||||
|
|
|
@ -8,6 +8,7 @@ import cn.iocoder.yudao.framework.common.pojo.PageResult;
|
||||||
import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
|
import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
|
||||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentPageReqVO;
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentPageReqVO;
|
||||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentUpdateReqVO;
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentUpdateReqVO;
|
||||||
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateListReqVO;
|
||||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateReqVO;
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateReqVO;
|
||||||
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
|
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
|
||||||
import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeDocumentMapper;
|
import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeDocumentMapper;
|
||||||
|
@ -21,9 +22,11 @@ import org.springframework.core.io.ByteArrayResource;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
|
import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
|
||||||
|
import static cn.iocoder.yudao.framework.common.util.collection.CollectionUtils.convertList;
|
||||||
import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.*;
|
import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -54,25 +57,45 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
|
||||||
knowledgeService.validateKnowledgeExists(createReqVO.getKnowledgeId());
|
knowledgeService.validateKnowledgeExists(createReqVO.getKnowledgeId());
|
||||||
|
|
||||||
// 2. 下载文档
|
// 2. 下载文档
|
||||||
TikaDocumentReader loader = new TikaDocumentReader(downloadFile(createReqVO.getUrl()));
|
String content = readUrl(createReqVO.getUrl());
|
||||||
List<Document> documents = loader.get();
|
|
||||||
Document document = CollUtil.getFirst(documents);
|
|
||||||
if (document == null || StrUtil.isEmpty(document.getText())) {
|
|
||||||
throw exception(KNOWLEDGE_DOCUMENT_FILE_READ_FAIL);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3. 文档记录入库
|
// 3. 文档记录入库
|
||||||
String content = document.getText();
|
|
||||||
AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class)
|
AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class)
|
||||||
.setContent(content).setContentLength(content.length()).setTokens(tokenCountEstimator.estimate(content))
|
.setContent(content).setContentLength(content.length()).setTokens(tokenCountEstimator.estimate(content))
|
||||||
.setStatus(CommonStatusEnum.ENABLE.getStatus());
|
.setStatus(CommonStatusEnum.ENABLE.getStatus());
|
||||||
knowledgeDocumentMapper.insert(documentDO);
|
knowledgeDocumentMapper.insert(documentDO);
|
||||||
|
|
||||||
// 4. 文档切片入库
|
// 4. 文档切片入库(同步)
|
||||||
knowledgeSegmentService.createKnowledgeSegmentBySplitContent(documentDO.getId(), document.getText());
|
knowledgeSegmentService.createKnowledgeSegmentBySplitContent(documentDO.getId(), content);
|
||||||
return documentDO.getId();
|
return documentDO.getId();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@Transactional(rollbackFor = Exception.class)
|
||||||
|
public List<Long> createKnowledgeDocumentList(AiKnowledgeDocumentCreateListReqVO createListReqVO) {
|
||||||
|
// 1. 校验参数
|
||||||
|
knowledgeService.validateKnowledgeExists(createListReqVO.getKnowledgeId());
|
||||||
|
|
||||||
|
// 2. 下载文档
|
||||||
|
List<String> contents = convertList(createListReqVO.getList(), document -> readUrl(document.getUrl()));
|
||||||
|
|
||||||
|
// 3. 文档记录入库
|
||||||
|
List<AiKnowledgeDocumentDO> documentDOs = new ArrayList<>(createListReqVO.getList().size());
|
||||||
|
for (int i = 0; i < createListReqVO.getList().size(); i++) {
|
||||||
|
AiKnowledgeDocumentCreateListReqVO.Document documentVO = createListReqVO.getList().get(i);
|
||||||
|
String content = contents.get(i);
|
||||||
|
documentDOs.add(BeanUtils.toBean(documentVO, AiKnowledgeDocumentDO.class).setKnowledgeId(createListReqVO.getKnowledgeId())
|
||||||
|
.setContent(content).setContentLength(content.length()).setTokens(tokenCountEstimator.estimate(content))
|
||||||
|
.setStatus(CommonStatusEnum.ENABLE.getStatus()));
|
||||||
|
}
|
||||||
|
knowledgeDocumentMapper.insertBatch(documentDOs);
|
||||||
|
|
||||||
|
// 4. 批量创建文档切片(异步)
|
||||||
|
documentDOs.forEach(documentDO ->
|
||||||
|
knowledgeSegmentService.createKnowledgeSegmentBySplitContentAsync(documentDO.getId(), documentDO.getContent()));
|
||||||
|
return convertList(documentDOs, AiKnowledgeDocumentDO::getId);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public PageResult<AiKnowledgeDocumentDO> getKnowledgeDocumentPage(AiKnowledgeDocumentPageReqVO pageReqVO) {
|
public PageResult<AiKnowledgeDocumentDO> getKnowledgeDocumentPage(AiKnowledgeDocumentPageReqVO pageReqVO) {
|
||||||
return knowledgeDocumentMapper.selectPage(pageReqVO);
|
return knowledgeDocumentMapper.selectPage(pageReqVO);
|
||||||
|
@ -97,17 +120,28 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
|
||||||
return knowledgeDocument;
|
return knowledgeDocument;
|
||||||
}
|
}
|
||||||
|
|
||||||
private org.springframework.core.io.Resource downloadFile(String url) {
|
private static String readUrl(String url) {
|
||||||
|
// 下载文件
|
||||||
|
ByteArrayResource resource = null;
|
||||||
try {
|
try {
|
||||||
byte[] bytes = HttpUtil.downloadBytes(url);
|
byte[] bytes = HttpUtil.downloadBytes(url);
|
||||||
if (bytes.length == 0) {
|
if (bytes.length == 0) {
|
||||||
throw exception(KNOWLEDGE_DOCUMENT_FILE_EMPTY);
|
throw exception(KNOWLEDGE_DOCUMENT_FILE_EMPTY);
|
||||||
}
|
}
|
||||||
return new ByteArrayResource(bytes);
|
resource = new ByteArrayResource(bytes);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("[downloadFile][url({}) 下载失败]", url, e);
|
log.error("[readUrl][url({}) 读取失败]", url, e);
|
||||||
throw new RuntimeException(e);
|
throw exception(KNOWLEDGE_DOCUMENT_FILE_DOWNLOAD_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 读取文件
|
||||||
|
TikaDocumentReader loader = new TikaDocumentReader(resource);
|
||||||
|
List<Document> documents = loader.get();
|
||||||
|
Document document = CollUtil.getFirst(documents);
|
||||||
|
if (document == null || StrUtil.isEmpty(document.getText())) {
|
||||||
|
throw exception(KNOWLEDGE_DOCUMENT_FILE_READ_FAIL);
|
||||||
|
}
|
||||||
|
return document.getText();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@ import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowle
|
||||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentUpdateReqVO;
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentUpdateReqVO;
|
||||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentUpdateStatusReqVO;
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentUpdateStatusReqVO;
|
||||||
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO;
|
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO;
|
||||||
|
import org.springframework.scheduling.annotation.Async;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
@ -32,6 +33,17 @@ public interface AiKnowledgeSegmentService {
|
||||||
*/
|
*/
|
||||||
void createKnowledgeSegmentBySplitContent(Long documentId, String content);
|
void createKnowledgeSegmentBySplitContent(Long documentId, String content);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 【异步】基于 content 内容,切片创建多个段落
|
||||||
|
*
|
||||||
|
* @param documentId 知识库文档编号
|
||||||
|
* @param content 文档内容
|
||||||
|
*/
|
||||||
|
@Async
|
||||||
|
default void createKnowledgeSegmentBySplitContentAsync(Long documentId, String content) {
|
||||||
|
createKnowledgeSegmentBySplitContent(documentId, content);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 更新段落的内容
|
* 更新段落的内容
|
||||||
*
|
*
|
||||||
|
|
|
@ -110,8 +110,10 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
|
||||||
// 3.1 更新切片
|
// 3.1 更新切片
|
||||||
AiKnowledgeSegmentDO segmentDO = BeanUtils.toBean(reqVO, AiKnowledgeSegmentDO.class);
|
AiKnowledgeSegmentDO segmentDO = BeanUtils.toBean(reqVO, AiKnowledgeSegmentDO.class);
|
||||||
segmentMapper.updateById(segmentDO);
|
segmentMapper.updateById(segmentDO);
|
||||||
// 3.2 重新向量化
|
// 3.2 重新向量化,必须开启状态
|
||||||
writeVectorStore(vectorStore, segmentDO, new Document(segmentDO.getContent()));
|
if (CommonStatusEnum.isEnable(segmentDO.getStatus())) {
|
||||||
|
writeVectorStore(vectorStore, segmentDO, new Document(segmentDO.getContent()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue