ElasticSearch學習筆記-鄰近匹配搜索記錄

ElasticSearch版本:elasticsearch-7.3.0

環境準備:

curl -H "Content-Type: application/json" -XPUT 'http://192.168.0.1:9200/article/' -d '
{
  "settings": {
    "analysis": {
      "analyzer": {
        "shingle_analyzer": {
          "type": "custom",
          "tokenizer": "ik_smart",
          "filter": [
            "shingle_filter"
          ]
        }
      },
      "filter": {
        "shingle_filter": {
          "type": "shingle",
          "min_shingle_size": 2,
          "max_shingle_size": 4,
          "output_unigrams": false
        }
      }
    }
  },
  "mappings": {
    "dynamic": "strict",
    "_source": {
      "excludes": [
        "id"
      ]
    },
    "properties": {
      "id": {
        "type": "keyword"
      },
      "title": {
        "analyzer": "ik_smart",
        "type": "text",
        "fields": {
          "raw": {
            "type": "keyword"
          },
          "shingle": {
            "type": "text",
            "analyzer": "shingle_analyzer"
          }
        }
      },
      "publish_time": {
        "type": "date",
        "format": "yyyy-MM-dd HH:mm:ss"
      }
    }
  }
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/1' -d '
{
    "id": "1",
    "title": "周杰倫、林俊杰、羅志祥、王力宏、潘瑋柏、蔡依林、孫燕姿、梁靜茹一同參加頒獎典禮",
    "publish_time": "2019-08-22 17:48:16"
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/2' -d '
{
    "id": "2",
    "title": "周杰倫、林俊杰、羅志祥、王力宏、潘瑋柏一同參加頒獎典禮",
    "publish_time": "2019-08-22 16:48:16"
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/3' -d '
{
    "id": "3",
    "title": "周杰倫、林俊杰、羅志祥、孫燕姿一同參加頒獎典禮",
    "publish_time": "2019-08-22 15:48:16"
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/4' -d '
{
    "id": "4",
    "title": "周杰倫、羅志祥、孫燕姿一同參加頒獎典禮",
    "publish_time": "2019-08-22 14:48:16"
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/5' -d '
{
    "id": "5",
    "title": "周杰倫、林俊杰、梁靜茹一同參加頒獎典禮",
    "publish_time": "2019-08-22 13:48:16"
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/6' -d '
{
    "id": "6",
    "title": "周杰倫、王力宏、潘瑋柏、林俊杰、羅志祥、孫燕姿一同參加頒獎典禮",
    "publish_time": "2019-08-22 12:48:16"
}
'
curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/article/_analyze?pretty=true' -d '
{
  "analyzer": "shingle_analyzer",
  "text": "周杰倫、林俊杰、羅志祥、王力宏、潘瑋柏、蔡依林、孫燕姿、梁靜茹一同參加頒獎典禮"
}
'

測試驗證代碼:

import java.io.IOException;
import java.util.Map;

import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ElasticRestClientHelper {
	
	private static final Logger LOG = LoggerFactory.getLogger(ElasticRestClientHelper.class);
	
	public static String search(QueryBuilder queryBuilder, int from, int size, String... indices) {
		RestHighLevelClient restHighLevelClient = ElasticRestClient.getInstance().getClient();
		SearchRequest searchRequest = new SearchRequest(indices);
		SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
		searchSourceBuilder.query(queryBuilder);
		searchSourceBuilder.from(from).size(size);
		HighlightBuilder highlightBuilder = new HighlightBuilder();
		highlightBuilder.preTags("<span style=\"color:red\">");
		highlightBuilder.postTags("</span>");
		highlightBuilder.field("*");
		searchSourceBuilder.highlighter(highlightBuilder);
		searchRequest.source(searchSourceBuilder);
		searchRequest.scroll(TimeValue.timeValueMinutes(5));
		searchRequest.searchType(SearchType.QUERY_THEN_FETCH);
		SearchResponse searchResponse = null;
		try {
			searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
		} catch (IOException e) {
			LOG.error(e.getMessage(), e);
		}
		LOG.info("took {}", searchResponse.getTook().getSeconds());
		SearchHits searchHits = searchResponse.getHits();
		long totalHits = searchHits.getTotalHits().value;
		LOG.info("total hits {}", totalHits);
		SearchHit[] searchHitArray = searchHits.getHits();
		for (int i = 0, len = searchHitArray.length; i < len; i++) {
			SearchHit searchHit = searchHitArray[i];
			Map<String, Object> source = searchHit.getSourceAsMap();
			wrapperHighLight(source, searchHit.getHighlightFields());
			LOG.info("{} {} {}", searchHit.getScore(), searchHit.getId(), source.get("title"));
		}
		try {
			restHighLevelClient.close();
		} catch (IOException e) {
			LOG.error(e.getMessage(), e);
		}
		return searchResponse.getScrollId();
	}
	
	private static void wrapperHighLight(Map<String, Object> source, Map<String, HighlightField> highLightFields) {
		String entryKey = null;
		Object entryValue = null;
		for (Map.Entry<String, Object> entry : source.entrySet()) {
			entryKey = entry.getKey();
			if (!highLightFields.containsKey(entryKey)) continue;
			Text[] texts = highLightFields.get(entryKey).getFragments();
			StringBuilder highLightText = new StringBuilder(100);
			for (int i = 0, tlen = texts.length; i < tlen; i++) {
				highLightText.append(texts[i]);
			}
			if (highLightText.length() > 0) entryValue = highLightText.toString();
			entry.setValue(entryValue);
		}
	}
	
	/** Match查詢匹配分詞後的關鍵字中任意一個以上 */
	public static void search_01() {
		MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("title", "周杰倫、羅志祥");
		search(matchQueryBuilder, 0, 10, "article");
	}
	
	/** 短語查詢匹配分詞後的關鍵字中每一個且鄰接有序 */
	public static void search_02() {
		MatchPhraseQueryBuilder matchPhraseQueryBuilder = QueryBuilders.matchPhraseQuery("title", "周杰倫、羅志祥");
		search(matchPhraseQueryBuilder, 0, 10, "article");
	}
	
	/** 短語查詢匹配分詞後的關鍵字中每一個且有序、間隔可以鄰接或爲1 */
	public static void search_03() {
		MatchPhraseQueryBuilder matchPhraseQueryBuilder = QueryBuilders.matchPhraseQuery("title", "周杰倫、羅志祥");
		matchPhraseQueryBuilder.slop(1);
		search(matchPhraseQueryBuilder, 0, 10, "article");
	}
	
	/** 短語查詢匹配分詞後的關鍵字中每一個且有序、間隔可以鄰接或爲10以內 */
	public static void search_04() {
		MatchPhraseQueryBuilder matchPhraseQueryBuilder = QueryBuilders.matchPhraseQuery("title", "周杰倫、羅志祥");
		matchPhraseQueryBuilder.slop(10);
		search(matchPhraseQueryBuilder, 0, 10, "article");
	}
	
	/** 在Match查詢的基礎上通過短語查詢提升相關度分值 */
	public static void search_05() {
		BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
		MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("title", "周杰倫、羅志祥");
		boolQueryBuilder.must(matchQueryBuilder);
		MatchPhraseQueryBuilder matchPhraseQueryBuilder = QueryBuilders.matchPhraseQuery("title", "周杰倫、羅志祥");
		matchPhraseQueryBuilder.slop(1);
		boolQueryBuilder.should(matchPhraseQueryBuilder);
		search(boolQueryBuilder, 0, 10, "article");
	}
	
	/** 在Match查詢的基礎上通過boost值來控制每個查詢子句的相對權重,提升相關度分值 */
	public static void search_06() {
		BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
		MatchQueryBuilder matchQueryBuilder1 = QueryBuilders.matchQuery("title", "周杰倫、林俊杰、羅志祥");
		matchQueryBuilder1.minimumShouldMatch("30%");  
		boolQueryBuilder.must(matchQueryBuilder1);
		MatchQueryBuilder matchQueryBuilder2 = QueryBuilders.matchQuery("title", "周杰倫");
		matchQueryBuilder2.boost(5.0f);
		boolQueryBuilder.should(matchQueryBuilder2);
		MatchQueryBuilder matchQueryBuilder3 = QueryBuilders.matchQuery("title", "林俊杰");
		matchQueryBuilder3.boost(3.0f);
		boolQueryBuilder.should(matchQueryBuilder3);
		search(boolQueryBuilder, 0, 10, "article");
	}
	
	/** 在Match查詢的基礎上通過shingle關聯詞提升相關度分值 */
	/** shingle索引時創建,比短語查詢靈活、性能高,需要選擇合適的shingle_size */
	public static void search_07() {
		BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
		MatchQueryBuilder matchQueryBuilder1 = QueryBuilders.matchQuery("title", "周杰倫、林俊杰、羅志祥");
		matchQueryBuilder1.minimumShouldMatch("30%");
		boolQueryBuilder.must(matchQueryBuilder1);
		MatchQueryBuilder matchQueryBuilder2 = QueryBuilders.matchQuery("title.shingle", "周杰倫、羅志祥");
		boolQueryBuilder.should(matchQueryBuilder2);
		search(boolQueryBuilder, 0, 10, "article");
	}
	
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章