作为系统的使用者,我希望用户输入搜索的过程中,系统能进行自动补全和搜索建议,协助用户输入更精准的关键词,提高后续全文搜索阶段文档匹配的准确度。
实现方案
- 用户刚开始输入的过程中,使用Completion Suggester进行关键词前缀匹配,刚开始匹配项会比较多,随着用户输入字符增多,匹配项越来越少。
- 如果Completion Suggester已经到了零匹配,可能是用户输入错误,尝试Phrase Suggester进行短语推荐。
- 如果Phrase Suggester没有找到任何option,开始尝试term Suggester进行单词推荐。
- 自动补全字段是通过将查询字段分词后,去重,然后加入到suggest字段中。
自动补全
自动补全DSL示例
GET product_completion_index/_search { "from": 0, "size": 100, "suggest": { "czbk-suggest": { "prefix": "小米", "completion": { "field": "searchkey", "size": 20, "skip_duplicates": true } } } }
自动补全字段数据获取方法
private boolean updateSuggest(HouseIndexTemplate indexTemplate) { //将分词字段加入AnalyzeRequestBuilder,通过ik_smart分词后会生成多个词组,然后将词组加入suggest字段 AnalyzeRequestBuilder requestBuilder = new AnalyzeRequestBuilder( this.esClient, AnalyzeAction.INSTANCE, INDEX_NAME, indexTemplate.getTitle(), indexTemplate.getLayoutDesc(), indexTemplate.getRoundService(), indexTemplate.getDescription(), indexTemplate.getSubwayLineName(), indexTemplate.getSubwayStationName()); //采用ik_smart分词 requestBuilder.setAnalyzer("ik_smart"); AnalyzeResponse response = requestBuilder.get(); List<AnalyzeResponse.AnalyzeToken> tokens = response.getTokens(); if (tokens == null) { logger.warn("Can not analyze token for house: " + indexTemplate.getHouseId()); return false; } List<HouseSuggest> suggests = new ArrayList<>(); for (AnalyzeResponse.AnalyzeToken token : tokens) { // 排序数字类型 & 小于2个字符的分词结果 if ("<NUM>".equals(token.getType()) || token.getTerm().length() < 2) { continue; } HouseSuggest suggest = new HouseSuggest(); suggest.setInput(token.getTerm()); suggests.add(suggest); } // 定制化小区自动补全(不需要分词的字段手动额外加入) HouseSuggest suggest = new HouseSuggest(); suggest.setInput(indexTemplate.getDistrict()); suggests.add(suggest); indexTemplate.setSuggest(suggests); return true; }
自动补全代码示例
package com.oldlu.service; import com.oldlu.commons.pojo.CommonEntity; import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.suggest.completion.CompletionSuggestion; import java.util.List; import java.util.Map; /** * @Class: ElasticsearchDocumentService * @Package com.oldlu.service * @Description: 文档操作接口 * @Company: http://www.oldlu.com/ */ public interface ElasticsearchDocumentService { //自动补全(完成建议) public List<String> cSuggest(CommonEntity commonEntity) throws Exception; }
Service
/* * @Description: 自动补全 根据用户的输入联想到可能的词或者短语 * @Method: suggester * @Param: [commonEntity] * @Update: * @since: 1.0.0 * @Return: org.elasticsearch.action.search.SearchResponse * */ public List<String> cSuggest(CommonEntity commonEntity) throws Exception { //定义返回 List<String> suggestList = new ArrayList<>(); //构建查询请求 SearchRequest searchRequest = new SearchRequest(commonEntity.getIndexName()); //通过查询构建器定义评分排序 SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC)); //构造搜索建议语句,搜索条件字段 CompletionSuggestionBuilder completionSuggestionBuilder =new CompletionSuggestionBuilder(commonEntity.getSuggestFileld()); //搜索关键字 completionSuggestionBuilder.prefix(commonEntity.getSuggestValue()); //去除重复 completionSuggestionBuilder.skipDuplicates(true); //匹配数量 completionSuggestionBuilder.size(commonEntity.getSuggestCount()); searchSourceBuilder.suggest(new SuggestBuilder().addSuggestion("czbk- suggest", completionSuggestionBuilder)); //czbk-suggest为返回的字段,所有返回将在czbk-suggest里面,可写死,sort按照评分排 序 searchRequest.source(searchSourceBuilder); //定义查找响应 SearchResponse suggestResponse = client.search(searchRequest, RequestOptions.DEFAULT); //定义完成建议对象 CompletionSuggestion completionSuggestion = suggestResponse.getSuggest().getSuggestion("czbk-suggest"); List<CompletionSuggestion.Entry.Option> optionsList = completionSuggestion.getEntries().get(0).getOptions(); //从optionsList取出结果 if (!CollectionUtils.isEmpty(optionsList)) { optionsList.forEach(item -> suggestList.add(item.getText().toString())); } return suggestList; }
Controller
/* * @Description 自动补全 * @Method: suggester * @Param: [commonEntity] * @Update: * @since: 1.0.0 * @Return: com.oldlu.commons.result.ResponseData * */ @GetMapping(value = "/csuggest") public ResponseData cSuggest(@RequestBody CommonEntity commonEntity) { // 构造返回数据 ResponseData rData = new ResponseData(); if (StringUtils.isEmpty(commonEntity.getIndexName()) || StringUtils.isEmpty(commonEntity.getSuggestFileld()) || StringUtils.isEmpty(commonEntity.getSuggestValue())) { rData.setResultEnum(ResultEnum.PARAM_ISNULL); return rData; } //批量查询返回结果 List<String> result = null; try { //通过高阶API调用批量新增操作方法 result = elasticsearchDocumentService.cSuggest(commonEntity); //通过类型推断自动装箱(多个参数取交集) rData.setResultEnum(result, ResultEnum.SUCCESS, result.size()); //日志记录 logger.info(TipsEnum.CSUGGEST_GET_DOC_SUCCESS.getMessage()); } catch (Exception e) { //日志记录 logger.error(TipsEnum.CSUGGEST_GET_DOC_FAIL.getMessage(), e); //构建错误返回信息 rData.setResultEnum(ResultEnum.ERROR); } return rData; }
拼写纠错
短语推荐DSL示例
GET product_completion_index/_search { "suggest": { "czbk-suggestion": { "text": "adidaas官方旗舰店", "phrase": { "field": "name", "size": 13 } } } }
拼写纠错代码示例
Service
/* * @Description: 拼写纠错 * @Method: psuggest * @Param: [commonEntity] * @Update: * @since: 1.0.0 * @Return: java.util.List<java.lang.String> * */ @Override public String pSuggest(CommonEntity commonEntity) throws Exception { //定义返回 String pSuggestString = new String(); //定义查询请求 SearchRequest searchRequest = new SearchRequest(commonEntity.getIndexName()); //定义查询条件构建器 SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); //定义排序器 searchSourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC)); //构造短语建议器对象(参数为匹配列) PhraseSuggestionBuilder pSuggestionBuilder = new PhraseSuggestionBuilder(commonEntity.getSuggestFileld()); //搜索关键字(被纠错的值) pSuggestionBuilder.text(commonEntity.getSuggestValue()); //匹配数量 pSuggestionBuilder.size(1); searchSourceBuilder.suggest(new SuggestBuilder().addSuggestion("czbk- suggest", pSuggestionBuilder)); searchRequest.source(searchSourceBuilder); //定义查找响应 SearchResponse suggestResponse = client.search(searchRequest, RequestOptions.DEFAULT); //定义短语建议对象 PhraseSuggestion phraseSuggestion = suggestResponse.getSuggest().getSuggestion("czbk-suggest"); //获取返回数据 List<PhraseSuggestion.Entry.Option> optionsList = phraseSuggestion.getEntries().get(0).getOptions(); //从optionsList取出结果 if (!CollectionUtils.isEmpty(optionsList) &&optionsList.get(0).getText()!=null) { pSuggestString = optionsList.get(0).getText().string().replaceAll(" ",""); } return pSuggestString; }
Controller
/* * @Description: 拼写纠错 * @Method: suggester2 * @Param: [commonEntity] * @Update: * @since: 1.0.0 * @Return: com.oldlu.commons.result.ResponseData * */ @GetMapping(value = "/psuggest") public ResponseData pSuggest(@RequestBody CommonEntity commonEntity) { // 构造返回数据 ResponseData rData = new ResponseData(); if (StringUtils.isEmpty(commonEntity.getIndexName()) || StringUtils.isEmpty(commonEntity.getSuggestFileld()) || StringUtils.isEmpty(commonEntity.getSuggestValue())) { rData.setResultEnum(ResultEnum.PARAM_ISNULL); return rData; } //批量查询返回结果 String result = null; try { //通过高阶API调用批量新增操作方法 result = elasticsearchDocumentService.pSuggest(commonEntity); //通过类型推断自动装箱(多个参数取交集) rData.setResultEnum(result, ResultEnum.SUCCESS, null); //日志记录 logger.info(TipsEnum.PSUGGEST_GET_DOC_SUCCESS.getMessage()); } catch (Exception e) { //日志记录 logger.error(TipsEnum.PSUGGEST_GET_DOC_FAIL.getMessage(), e); //构建错误返回信息 rData.setResultEnum(ResultEnum.ERROR); } return rData; }标签:搜索,补全,suggest,indexTemplate,自动,commonEntity,rData,new From: https://www.cnblogs.com/libin2015/p/17955833