ElasticSearch版本:elasticsearch-7.3.0
環境準備:
curl -H "Content-Type: application/json" -XPUT 'http://192.168.0.1:9200/article/' -d '
{
"settings": {
"analysis": {
"analyzer": {
"shingle_analyzer": {
"type": "custom",
"tokenizer": "ik_smart",
"filter": [
"shingle_filter"
]
}
},
"filter": {
"shingle_filter": {
"type": "shingle",
"min_shingle_size": 2,
"max_shingle_size": 4,
"output_unigrams": false
}
}
}
},
"mappings": {
"dynamic": "strict",
"_source": {
"excludes": [
"id"
]
},
"properties": {
"id": {
"type": "keyword"
},
"title": {
"analyzer": "ik_smart",
"type": "text",
"fields": {
"raw": {
"type": "keyword"
},
"shingle": {
"type": "text",
"analyzer": "shingle_analyzer"
}
}
},
"publish_time": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/1' -d '
{
"id": "1",
"title": "周杰倫、林俊杰、羅志祥、王力宏、潘瑋柏、蔡依林、孫燕姿、梁靜茹一同參加頒獎典禮",
"publish_time": "2019-08-22 17:48:16"
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/2' -d '
{
"id": "2",
"title": "周杰倫、林俊杰、羅志祥、王力宏、潘瑋柏一同參加頒獎典禮",
"publish_time": "2019-08-22 16:48:16"
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/3' -d '
{
"id": "3",
"title": "周杰倫、林俊杰、羅志祥、孫燕姿一同參加頒獎典禮",
"publish_time": "2019-08-22 15:48:16"
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/4' -d '
{
"id": "4",
"title": "周杰倫、羅志祥、孫燕姿一同參加頒獎典禮",
"publish_time": "2019-08-22 14:48:16"
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/5' -d '
{
"id": "5",
"title": "周杰倫、林俊杰、梁靜茹一同參加頒獎典禮",
"publish_time": "2019-08-22 13:48:16"
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/6' -d '
{
"id": "6",
"title": "周杰倫、王力宏、潘瑋柏、林俊杰、羅志祥、孫燕姿一同參加頒獎典禮",
"publish_time": "2019-08-22 12:48:16"
}
'
curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/article/_analyze?pretty=true' -d '
{
"analyzer": "shingle_analyzer",
"text": "周杰倫、林俊杰、羅志祥、王力宏、潘瑋柏、蔡依林、孫燕姿、梁靜茹一同參加頒獎典禮"
}
'
測試驗證代碼:
import java.io.IOException;
import java.util.Map;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ElasticRestClientHelper {
private static final Logger LOG = LoggerFactory.getLogger(ElasticRestClientHelper.class);
public static String search(QueryBuilder queryBuilder, int from, int size, String... indices) {
RestHighLevelClient restHighLevelClient = ElasticRestClient.getInstance().getClient();
SearchRequest searchRequest = new SearchRequest(indices);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(queryBuilder);
searchSourceBuilder.from(from).size(size);
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.preTags("<span style=\"color:red\">");
highlightBuilder.postTags("</span>");
highlightBuilder.field("*");
searchSourceBuilder.highlighter(highlightBuilder);
searchRequest.source(searchSourceBuilder);
searchRequest.scroll(TimeValue.timeValueMinutes(5));
searchRequest.searchType(SearchType.QUERY_THEN_FETCH);
SearchResponse searchResponse = null;
try {
searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
} catch (IOException e) {
LOG.error(e.getMessage(), e);
}
LOG.info("took {}", searchResponse.getTook().getSeconds());
SearchHits searchHits = searchResponse.getHits();
long totalHits = searchHits.getTotalHits().value;
LOG.info("total hits {}", totalHits);
SearchHit[] searchHitArray = searchHits.getHits();
for (int i = 0, len = searchHitArray.length; i < len; i++) {
SearchHit searchHit = searchHitArray[i];
Map<String, Object> source = searchHit.getSourceAsMap();
wrapperHighLight(source, searchHit.getHighlightFields());
LOG.info("{} {} {}", searchHit.getScore(), searchHit.getId(), source.get("title"));
}
try {
restHighLevelClient.close();
} catch (IOException e) {
LOG.error(e.getMessage(), e);
}
return searchResponse.getScrollId();
}
private static void wrapperHighLight(Map<String, Object> source, Map<String, HighlightField> highLightFields) {
String entryKey = null;
Object entryValue = null;
for (Map.Entry<String, Object> entry : source.entrySet()) {
entryKey = entry.getKey();
if (!highLightFields.containsKey(entryKey)) continue;
Text[] texts = highLightFields.get(entryKey).getFragments();
StringBuilder highLightText = new StringBuilder(100);
for (int i = 0, tlen = texts.length; i < tlen; i++) {
highLightText.append(texts[i]);
}
if (highLightText.length() > 0) entryValue = highLightText.toString();
entry.setValue(entryValue);
}
}
/** Match查詢匹配分詞後的關鍵字中任意一個以上 */
public static void search_01() {
MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("title", "周杰倫、羅志祥");
search(matchQueryBuilder, 0, 10, "article");
}
/** 短語查詢匹配分詞後的關鍵字中每一個且鄰接有序 */
public static void search_02() {
MatchPhraseQueryBuilder matchPhraseQueryBuilder = QueryBuilders.matchPhraseQuery("title", "周杰倫、羅志祥");
search(matchPhraseQueryBuilder, 0, 10, "article");
}
/** 短語查詢匹配分詞後的關鍵字中每一個且有序、間隔可以鄰接或爲1 */
public static void search_03() {
MatchPhraseQueryBuilder matchPhraseQueryBuilder = QueryBuilders.matchPhraseQuery("title", "周杰倫、羅志祥");
matchPhraseQueryBuilder.slop(1);
search(matchPhraseQueryBuilder, 0, 10, "article");
}
/** 短語查詢匹配分詞後的關鍵字中每一個且有序、間隔可以鄰接或爲10以內 */
public static void search_04() {
MatchPhraseQueryBuilder matchPhraseQueryBuilder = QueryBuilders.matchPhraseQuery("title", "周杰倫、羅志祥");
matchPhraseQueryBuilder.slop(10);
search(matchPhraseQueryBuilder, 0, 10, "article");
}
/** 在Match查詢的基礎上通過短語查詢提升相關度分值 */
public static void search_05() {
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("title", "周杰倫、羅志祥");
boolQueryBuilder.must(matchQueryBuilder);
MatchPhraseQueryBuilder matchPhraseQueryBuilder = QueryBuilders.matchPhraseQuery("title", "周杰倫、羅志祥");
matchPhraseQueryBuilder.slop(1);
boolQueryBuilder.should(matchPhraseQueryBuilder);
search(boolQueryBuilder, 0, 10, "article");
}
/** 在Match查詢的基礎上通過boost值來控制每個查詢子句的相對權重,提升相關度分值 */
public static void search_06() {
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
MatchQueryBuilder matchQueryBuilder1 = QueryBuilders.matchQuery("title", "周杰倫、林俊杰、羅志祥");
matchQueryBuilder1.minimumShouldMatch("30%");
boolQueryBuilder.must(matchQueryBuilder1);
MatchQueryBuilder matchQueryBuilder2 = QueryBuilders.matchQuery("title", "周杰倫");
matchQueryBuilder2.boost(5.0f);
boolQueryBuilder.should(matchQueryBuilder2);
MatchQueryBuilder matchQueryBuilder3 = QueryBuilders.matchQuery("title", "林俊杰");
matchQueryBuilder3.boost(3.0f);
boolQueryBuilder.should(matchQueryBuilder3);
search(boolQueryBuilder, 0, 10, "article");
}
/** 在Match查詢的基礎上通過shingle關聯詞提升相關度分值 */
/** shingle索引時創建,比短語查詢靈活、性能高,需要選擇合適的shingle_size */
public static void search_07() {
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
MatchQueryBuilder matchQueryBuilder1 = QueryBuilders.matchQuery("title", "周杰倫、林俊杰、羅志祥");
matchQueryBuilder1.minimumShouldMatch("30%");
boolQueryBuilder.must(matchQueryBuilder1);
MatchQueryBuilder matchQueryBuilder2 = QueryBuilders.matchQuery("title.shingle", "周杰倫、羅志祥");
boolQueryBuilder.should(matchQueryBuilder2);
search(boolQueryBuilder, 0, 10, "article");
}
}