【代码评审】AI:联网检索
This commit is contained in:
parent
4ea26c7e81
commit
d7567e669c
|
@ -3,9 +3,7 @@ package cn.iocoder.yudao.module.ai.controller.admin.chat.vo.message;
|
|||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import jakarta.validation.constraints.NotEmpty;
|
||||
import jakarta.validation.constraints.NotNull;
|
||||
import jakarta.validation.constraints.Size;
|
||||
import lombok.Data;
|
||||
import lombok.experimental.Accessors;
|
||||
|
||||
@Schema(description = "管理后台 - AI 聊天消息发送 Request VO")
|
||||
@Data
|
||||
|
@ -22,6 +20,8 @@ public class AiChatMessageSendReqVO {
|
|||
@Schema(description = "是否携带上下文", example = "true")
|
||||
private Boolean useContext;
|
||||
|
||||
@Schema(description = "搜索enable", example = "true")
|
||||
// TODO @芋艿:改成 useSearch,保持和 useContext 一个风格
|
||||
@Schema(description = "是否搜索", example = "true")
|
||||
private Boolean searchEnable;
|
||||
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ import cn.iocoder.yudao.module.ai.service.knowledge.AiKnowledgeSegmentService;
|
|||
import cn.iocoder.yudao.module.ai.service.model.AiApiKeyService;
|
||||
import cn.iocoder.yudao.module.ai.service.model.AiChatModelService;
|
||||
import cn.iocoder.yudao.module.ai.service.websearch.WebSearchService;
|
||||
import cn.iocoder.yudao.module.ai.service.websearch.vo.WebSearchRespVO;
|
||||
import cn.iocoder.yudao.module.ai.service.websearch.vo.AiWebSearchRespVO;
|
||||
import jakarta.annotation.Resource;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.ai.chat.messages.Message;
|
||||
|
@ -97,9 +97,10 @@ public class AiChatMessageServiceImpl implements AiChatMessageService {
|
|||
List<AiKnowledgeSegmentDO> segmentList = recallSegment(sendReqVO.getContent(), conversation.getKnowledgeId());
|
||||
|
||||
// 3.3 联网搜索内容
|
||||
List<WebSearchRespVO> webSearch = getWebSearch(sendReqVO.getContent(), sendReqVO.getSearchEnable(), 10);
|
||||
// TODO @芋艿:可能要改成前端检索;
|
||||
List<AiWebSearchRespVO> webSearch = getWebSearch(sendReqVO.getContent(), sendReqVO.getSearchEnable(), 10);
|
||||
|
||||
// 3.4 创建 chat 需要的 Prompt
|
||||
// 3.4 创建 Chat 需要的 Prompt
|
||||
Prompt prompt = buildPrompt(conversation, historyMessages, segmentList, model, sendReqVO, webSearch);
|
||||
ChatResponse chatResponse = chatModel.call(prompt);
|
||||
|
||||
|
@ -135,7 +136,7 @@ public class AiChatMessageServiceImpl implements AiChatMessageService {
|
|||
|
||||
// 3.3 联网搜索
|
||||
// todo count 看是否需要放到配置文件
|
||||
List<WebSearchRespVO> webSearch = getWebSearch(sendReqVO.getContent(), sendReqVO.getSearchEnable(), 10);
|
||||
List<AiWebSearchRespVO> webSearch = getWebSearch(sendReqVO.getContent(), sendReqVO.getSearchEnable(), 10);
|
||||
|
||||
// 3.4 构建 Prompt,并进行调用
|
||||
Prompt prompt = buildPrompt(conversation, historyMessages, segmentList, model, sendReqVO, webSearch);
|
||||
|
@ -172,12 +173,12 @@ public class AiChatMessageServiceImpl implements AiChatMessageService {
|
|||
* @param count 查询数量
|
||||
* @return 返回查询结果
|
||||
*/
|
||||
private List<WebSearchRespVO> getWebSearch(String prompt, Boolean searchEnable, int count) {
|
||||
private List<AiWebSearchRespVO> getWebSearch(String prompt, Boolean searchEnable, int count) {
|
||||
if (searchEnable != null && searchEnable) {
|
||||
List<WebSearchRespVO> webSearchRespList = webSearchService.bingSearch(prompt, count);
|
||||
List<AiWebSearchRespVO> webSearchRespList = webSearchService.bingSearch(prompt, count);
|
||||
Map<String, String> webCrawlerRespMap
|
||||
= webSearchService.webCrawler(webSearchRespList.stream().map(WebSearchRespVO::getUrl).toList());
|
||||
for (WebSearchRespVO webSearchRespVO : webSearchRespList) {
|
||||
= webSearchService.webCrawler(webSearchRespList.stream().map(AiWebSearchRespVO::getUrl).toList());
|
||||
for (AiWebSearchRespVO webSearchRespVO : webSearchRespList) {
|
||||
if (!webCrawlerRespMap.containsKey(webSearchRespVO.getUrl())) {
|
||||
continue;
|
||||
}
|
||||
|
@ -196,7 +197,7 @@ public class AiChatMessageServiceImpl implements AiChatMessageService {
|
|||
|
||||
private Prompt buildPrompt(AiChatConversationDO conversation, List<AiChatMessageDO> messages,
|
||||
List<AiKnowledgeSegmentDO> segmentList, AiChatModelDO model,
|
||||
AiChatMessageSendReqVO sendReqVO, List<WebSearchRespVO> webSearchRespList) {
|
||||
AiChatMessageSendReqVO sendReqVO, List<AiWebSearchRespVO> webSearchRespList) {
|
||||
// 1. 构建 Prompt Message 列表
|
||||
List<Message> chatMessages = new ArrayList<>();
|
||||
|
||||
|
@ -217,13 +218,14 @@ public class AiChatMessageServiceImpl implements AiChatMessageService {
|
|||
List<AiChatMessageDO> contextMessages = filterContextMessages(messages, conversation, sendReqVO);
|
||||
contextMessages.forEach(message -> chatMessages.add(AiUtils.buildMessage(message.getType(), message.getContent())));
|
||||
// 1.4 user message 新发送消息
|
||||
// TODO @芋艿:处理下 prompt 模版
|
||||
if (sendReqVO.getSearchEnable() != null
|
||||
&& sendReqVO.getSearchEnable() && CollUtil.isNotEmpty(webSearchRespList)) {
|
||||
|
||||
StringBuilder promptBuilder = StrUtil.builder();
|
||||
promptBuilder.append("## 以下是联网搜索内容: \n");
|
||||
int i = 1;
|
||||
for (WebSearchRespVO webSearchRespVO : webSearchRespList) {
|
||||
for (AiWebSearchRespVO webSearchRespVO : webSearchRespList) {
|
||||
promptBuilder.append("[内容%s begin]".formatted(i)).append("\n");
|
||||
promptBuilder.append("标题:").append(webSearchRespVO.getTitle()).append("\n");
|
||||
promptBuilder.append("地址:").append(webSearchRespVO.getUrl()).append("\n");
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
package cn.iocoder.yudao.module.ai.service.websearch;
|
||||
|
||||
import cn.iocoder.yudao.module.ai.service.websearch.vo.WebSearchRespVO;
|
||||
import cn.iocoder.yudao.module.ai.service.websearch.vo.AiWebSearchRespVO;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -17,8 +17,8 @@ public interface WebSearchService {
|
|||
* @param count 返回结果数量
|
||||
* @return 搜索结果列表
|
||||
*/
|
||||
List<WebSearchRespVO> bingSearch(String query, Integer count);
|
||||
|
||||
List<AiWebSearchRespVO> bingSearch(String query, Integer count);
|
||||
|
||||
/**
|
||||
* Google 搜索
|
||||
*
|
||||
|
@ -26,7 +26,7 @@ public interface WebSearchService {
|
|||
* @param count 返回结果数量
|
||||
* @return 搜索结果列表
|
||||
*/
|
||||
List<WebSearchRespVO> googleSearch(String query, Integer count);
|
||||
List<AiWebSearchRespVO> googleSearch(String query, Integer count);
|
||||
|
||||
/**
|
||||
* web 爬虫
|
||||
|
|
|
@ -6,7 +6,7 @@ import cn.hutool.http.HttpResponse;
|
|||
import cn.hutool.json.JSONArray;
|
||||
import cn.hutool.json.JSONObject;
|
||||
import cn.hutool.json.JSONUtil;
|
||||
import cn.iocoder.yudao.module.ai.service.websearch.vo.WebSearchRespVO;
|
||||
import cn.iocoder.yudao.module.ai.service.websearch.vo.AiWebSearchRespVO;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
@ -46,11 +46,11 @@ public class WebSearchServiceImpl implements WebSearchService {
|
|||
* @return 搜索结果列表
|
||||
*/
|
||||
@Override
|
||||
public List<WebSearchRespVO> bingSearch(String query, Integer count) {
|
||||
public List<AiWebSearchRespVO> bingSearch(String query, Integer count) {
|
||||
if (query == null || query.isEmpty()) {
|
||||
return CollUtil.newArrayList();
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
// 发送请求
|
||||
HttpResponse response = HttpRequest.get(BING_URL)
|
||||
|
@ -60,41 +60,41 @@ public class WebSearchServiceImpl implements WebSearchService {
|
|||
.form("responseFilter", "Webpages")
|
||||
.form("textFormat", "Raw")
|
||||
.execute();
|
||||
|
||||
|
||||
// 解析响应
|
||||
String body = response.body();
|
||||
JSONObject json = JSONUtil.parseObj(body);
|
||||
|
||||
|
||||
// 处理结果
|
||||
List<WebSearchRespVO> results = new ArrayList<>();
|
||||
List<AiWebSearchRespVO> results = new ArrayList<>();
|
||||
if (json.containsKey("webPages") && json.getJSONObject("webPages").containsKey("value")) {
|
||||
JSONArray items = json.getJSONObject("webPages").getJSONArray("value");
|
||||
for (int i = 0; i < items.size(); i++) {
|
||||
JSONObject item = items.getJSONObject(i);
|
||||
WebSearchRespVO result = new WebSearchRespVO()
|
||||
AiWebSearchRespVO result = new AiWebSearchRespVO()
|
||||
.setTitle(item.getStr("name"))
|
||||
.setUrl(item.getStr("url"))
|
||||
.setSnippet(item.getStr("snippet"));
|
||||
results.add(result);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return results;
|
||||
} catch (Exception e) {
|
||||
log.error("[bingSearch][查询({}) 发生异常]", query, e);
|
||||
return CollUtil.newArrayList();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Google 搜索(使用Serper API)
|
||||
* Google 搜索(使用 Serper API)
|
||||
*
|
||||
* @param query 搜索关键词
|
||||
* @param count 返回结果数量
|
||||
* @return 搜索结果列表
|
||||
*/
|
||||
@Override
|
||||
public List<WebSearchRespVO> googleSearch(String query, Integer count) {
|
||||
public List<AiWebSearchRespVO> googleSearch(String query, Integer count) {
|
||||
if (query == null || query.isEmpty()) {
|
||||
return CollUtil.newArrayList();
|
||||
}
|
||||
|
@ -105,24 +105,24 @@ public class WebSearchServiceImpl implements WebSearchService {
|
|||
payload.set("q", query);
|
||||
payload.set("gl", "cn");
|
||||
payload.set("num", count);
|
||||
|
||||
|
||||
// 发送请求
|
||||
HttpResponse response = HttpRequest.post(GOOGLE_URL)
|
||||
.header("X-API-KEY", googleApiKey)
|
||||
.header("Content-Type", "application/json")
|
||||
.body(payload.toString())
|
||||
.execute();
|
||||
|
||||
|
||||
// 解析响应
|
||||
String body = response.body();
|
||||
JSONObject json = JSONUtil.parseObj(body);
|
||||
JSONArray organicResults = json.getJSONArray("organic");
|
||||
|
||||
|
||||
// 处理结果
|
||||
List<WebSearchRespVO> results = new ArrayList<>();
|
||||
List<AiWebSearchRespVO> results = new ArrayList<>();
|
||||
for (int i = 0; i < organicResults.size(); i++) {
|
||||
JSONObject item = organicResults.getJSONObject(i);
|
||||
WebSearchRespVO result = new WebSearchRespVO()
|
||||
AiWebSearchRespVO result = new AiWebSearchRespVO()
|
||||
.setTitle(item.getStr("title"))
|
||||
.setUrl(item.getStr("link"))
|
||||
.setSnippet(item.containsKey("snippet") ? item.getStr("snippet") : "");
|
||||
|
@ -146,13 +146,13 @@ public class WebSearchServiceImpl implements WebSearchService {
|
|||
if (CollUtil.isEmpty(urls)) {
|
||||
return Map.of();
|
||||
}
|
||||
|
||||
|
||||
Map<String, String> result = new HashMap<>();
|
||||
for (String url : urls) {
|
||||
try {
|
||||
// 解析URL以获取域名作为Origin
|
||||
String origin = extractOrigin(url);
|
||||
|
||||
|
||||
// 发送HTTP请求获取网页内容
|
||||
HttpResponse response = HttpRequest.get(url)
|
||||
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
||||
|
@ -163,22 +163,22 @@ public class WebSearchServiceImpl implements WebSearchService {
|
|||
.header("Cache-Control", "max-age=0")
|
||||
.timeout(10000) // 设置10秒超时
|
||||
.execute();
|
||||
|
||||
|
||||
if (response.isOk()) {
|
||||
String html = response.body();
|
||||
|
||||
|
||||
// 使用Jsoup解析HTML并提取文本内容
|
||||
org.jsoup.nodes.Document doc = org.jsoup.Jsoup.parse(html);
|
||||
|
||||
|
||||
// 移除script和style元素,它们包含的内容不是我们需要的文本
|
||||
doc.select("script, style, meta, link").remove();
|
||||
|
||||
|
||||
// 获取body中的文本内容
|
||||
String text = doc.body().text();
|
||||
|
||||
|
||||
// 清理文本(移除多余空格)
|
||||
text = text.replaceAll("\\s+", " ").trim();
|
||||
|
||||
|
||||
result.put(url, text);
|
||||
} else {
|
||||
log.warn("[webCrawler][URL({}) 请求失败,状态码: {}]", url, response.getStatus());
|
||||
|
@ -189,20 +189,20 @@ public class WebSearchServiceImpl implements WebSearchService {
|
|||
result.put(url, "");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 从URL中提取Origin
|
||||
*
|
||||
*
|
||||
* @param url 完整URL
|
||||
* @return Origin (scheme://host[:port])
|
||||
*/
|
||||
private String extractOrigin(String url) {
|
||||
try {
|
||||
java.net.URL parsedUrl = new java.net.URL(url);
|
||||
return parsedUrl.getProtocol() + "://" + parsedUrl.getHost() +
|
||||
return parsedUrl.getProtocol() + "://" + parsedUrl.getHost() +
|
||||
(parsedUrl.getPort() == -1 ? "" : ":" + parsedUrl.getPort());
|
||||
} catch (Exception e) {
|
||||
log.warn("[extractOrigin][URL({}) 解析异常]", url, e);
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
package cn.iocoder.yudao.module.ai.service.websearch.vo;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.experimental.Accessors;
|
||||
|
||||
/**
|
||||
* 搜索结果
|
||||
* AI 搜索结果
|
||||
*/
|
||||
@Data
|
||||
@Accessors(chain = true)
|
||||
public class WebSearchRespVO {
|
||||
public class AiWebSearchRespVO {
|
||||
|
||||
/**
|
||||
* 标题
|
||||
|
@ -26,4 +24,5 @@ public class WebSearchRespVO {
|
|||
* 网站内容
|
||||
*/
|
||||
private String content;
|
||||
}
|
||||
|
||||
}
|
|
@ -1 +1,2 @@
|
|||
// TODO @芋艿:看情况删除
|
||||
package cn.iocoder.yudao.module.ai;
|
|
@ -1,7 +1,7 @@
|
|||
package cn.iocoder.yudao.module.ai.service;
|
||||
|
||||
import cn.iocoder.yudao.module.ai.service.websearch.WebSearchServiceImpl;
|
||||
import cn.iocoder.yudao.module.ai.service.websearch.vo.WebSearchRespVO;
|
||||
import cn.iocoder.yudao.module.ai.service.websearch.vo.AiWebSearchRespVO;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
@ -41,14 +41,14 @@ public class WebSearchServiceTests {
|
|||
// 创建网络搜索服务实例
|
||||
WebSearchServiceImpl webSearchService = new WebSearchServiceImpl();
|
||||
// 调用谷歌搜索方法,搜索"长沙今天天气",限制返回6条结果
|
||||
List<WebSearchRespVO> webSearchRespList = webSearchService.googleSearch("长沙今天天气", 6);
|
||||
List<AiWebSearchRespVO> webSearchRespList = webSearchService.googleSearch("长沙今天天气", 6);
|
||||
|
||||
// 从搜索结果中提取URL,并爬取对应网页内容
|
||||
Map<String, String> webCrawlerRespMap
|
||||
= webSearchService.webCrawler(webSearchRespList.stream().map(WebSearchRespVO::getUrl).toList());
|
||||
= webSearchService.webCrawler(webSearchRespList.stream().map(AiWebSearchRespVO::getUrl).toList());
|
||||
|
||||
// 打印搜索结果
|
||||
for (WebSearchRespVO webSearchRespVO : webSearchRespList) {
|
||||
for (AiWebSearchRespVO webSearchRespVO : webSearchRespList) {
|
||||
System.err.println(JSON.toJSONString(webSearchRespVO));
|
||||
}
|
||||
|
||||
|
|
|
@ -228,7 +228,7 @@ yudao:
|
|||
wxa-subscribe-message:
|
||||
miniprogram-state: developer # 跳转小程序类型:开发版为 “developer”;体验版为 “trial”为;正式版为 “formal”
|
||||
tencent-lbs-key: TVDBZ-TDILD-4ON4B-PFDZA-RNLKH-VVF6E # QQ 地图的密钥 https://lbs.qq.com/service/staticV2/staticGuide/staticDoc
|
||||
web-search:
|
||||
web-search: # TODO 芋艿:key 要不要放到 yudao ai 那去
|
||||
bing-api-key: xx
|
||||
google-api-key: xx
|
||||
justauth:
|
||||
|
|
Loading…
Reference in New Issue