ElasticSearch-7.3.0 進階語法
字段類型
# Text:被分析索引的字符串類型
# Keyword:不能被分析只能被精確匹配的字符串類型
# Date:日期類型,可以配置 format 一起使用({"type": "date", "format": "yyyy-MM-dd"})
# 數字類型:long,integer,short,double 等
# boolean 類型:true,false
# Array:數組類型 ["one", "two"]
# Object:json 嵌套({"property1": "value1", "property2": "value2"})
# Ip類型:127.0.0.1
# Geo_point:地理位置
地址的定義:
{
"mappings": {
"_doc": {
"properties": {
"location": {
"type": "geo_point"
}
}
}
}
}
建立索引的方式:
"location": {
"lat": 41.12,
"lon": -71.34
}
高級查詢語法
analyze
分析過程
# 使用 analyze api 查看分詞狀態
GET /movie/_analyze
{
"field": "name",
"text": "Eating an apple a day & keeps the doctor awawy"
}
# 使用結構化的方式重新創建索引(指定分詞器)
PUT /movie
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"name": {
"type": "text",
"analyzer": "english"
}
}
}
}
Tmdb
實例
數據下載
在網上直接搜索kiggle tmdb
即可下載相對應的數據文件
索引建立
# 建立 movie 索引
PUT /movie
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "english"
},
"tagline": {
"type": "text",
"analyzer": "english"
},
"release_date": {
"type": "date",
"format": "8yyyy/MM/dd||yyyy/M/dd||yyyy/MM/d||yyyy/M/d"
},
"popularity": {
"type": "double"
},
"overview": {
"type": "text",
"analyzer": "english"
},
"cast": {
"type": "object",
"properties": {
"character": {
"type": "text",
"analyzer": "standard"
},
"name": {
"type": "text",
"analyzer": "standard"
}
}
}
}
}
}
match
和term
# match 查詢
GET /movie/_search
{
"query": {
"match": {
"title": "steve zissou"
}
}
}
# term 查詢
GET /movie/_search
{
"query": {
"term": {
"title": {
"value": "steve zissou"
}
}
}
}
# match 查詢會根據字段所指定的分詞器對查詢字段進行分詞,而 term 並不會對查詢字段進行分詞,也就是說對於上面兩個示例,title 指定的是 english 分詞器,所以 match 查詢中的 steve zissou 會被分詞器解析成 steve 和 zissou 兩個關鍵詞,所以只要 title 中含有 steve 和 zissou 中任意一個關鍵詞的都可以被命中,而 term 查詢中的 steve zissou 不會被分詞器解析,也就是說只有 title 中經過分詞器解析後的詞包含 steve zissou 時纔會被命中。
分詞後的and
和or
# 分詞後的 or 的邏輯
GET /movie/_search
{
"query": {
"match": {
"title": "basketball with cartoom aliens"
}
}
}
# 分詞後的 and 的邏輯
GET /movie/_search
{
"query": {
"match": {
"title": {
"query": "basketball with cartoom aliens",
"operator": "and"
}
}
}
}
最小詞匹配項
# 最小詞匹配項
GET /movie/_search
{
"query": {
"match": {
"title": {
"query": "basketball love aliens",
"operator": "or",
"minimum_should_match": 2
}
}
}
}
短語查詢
# 短語查詢
GET /movie/_search
{
"query": {
"match_phrase": {
"title": "steve zissou"
}
}
}
score
打分
# 查看 score
GET /movie/_search
{
"explain": true,
"query": {
"match": {
"title": "steve"
}
}
}
======================================================
"details" : [
{
# 2.2 * 7.1592917 * 0.47008154 = 7.403992
"value" : 7.403992,
"description" : "score(freq=1.0), product of:",
"details" : [
{
"value" : 2.2,
# 可以手動指定這個 boost 放大係數,如果不指定,那麼 es 將使用默認值爲 2.2 的放大係數
"description" : "boost",
"details" : [ ]
},
{
"value" : 7.1592917,
# 逆文檔頻率:隨着 n 的增加,整個 idf 是減少的
"description" : "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
"details" : [
{
"value" : 3,
# 一共命中了 3 篇文檔
"description" : "n, number of documents containing term",
"details" : [ ]
},
{
"value" : 4500,
# 文檔的總個數爲 4500
"description" : "N, total number of documents with field",
"details" : [ ]
}
]
},
{
"value" : 0.47008154,
"description" : "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
"details" : [
{
"value" : 1.0,
# 搜索關鍵詞在文檔字段中出現的次數
"description" : "freq, occurrences of term within document",
"details" : [ ]
},
{
"value" : 1.2,
"description" : "k1, term saturation parameter",
"details" : [ ]
},
{
"value" : 0.75,
"description" : "b, length normalization parameter",
"details" : [ ]
},
{
"value" : 2.0,
# 文檔字段的長度
"description" : "dl, length of field",
"details" : [ ]
},
{
"value" : 2.1757777,
"description" : "avgdl, average length of field",
"details" : [ ]
}
]
}
]
}
]
多字段查詢
# 多字段查詢:在多字段查詢時,會對兩個字段都進行打分,最後的打分結果取的是最大的那個分值
GET /movie/_search
{
"query": {
"multi_match": {
"query": "basketball with cartoom aliens",
"fields": ["title", "overview"]
}
}
}
# 優化多字段查詢:讓 title 字段佔比加大
GET /movie/_search
{
"query": {
"multi_match": {
"query": "basketball with cartoom aliens",
"fields": ["title^10", "overview"]
}
}
}
# 優化多字段查詢
GET /movie/_search
{
"explain": true,
"query": {
"multi_match": {
"query": "basketball with cartoom aliens",
"fields": ["title^10", "overview"],
"tie_breaker": 0.3
}
}
}
# bool 查詢
# must:必須都爲 true
# must not:必須都是 false
# should:其中只要有一個爲 true,即可
# 爲 true 的越多則得分越高
GET /movie/_search
{
"explain": true,
"query": {
"bool": {
"should": [
{
"match": {
"title": "basketball with cartoom aliens"
}
},
{
"match": {
"overview": "basketball with cartoom aliens"
}
}
]
}
}
}
# 不同的 multi_query 其實是有不同的 type,type 不同則打分方式不同
# best_fields:默認的得分方式,取得最高的分數作爲對應文檔的得分,“最匹配模式” -> dis_max
GET /movie/_search
{
"query": {
"multi_match": {
"query": "basketball with cartoom aliens",
"fields": ["title", "overview"],
"type": "best_fields"
}
}
}
# dis_max
GET /movie/_search
{
"explain": true,
"query": {
"dis_max": {
"queries": [
{
"match": {
"title": "basketball with cartoom aliens"
}
},
{
"match": {
"overview": "basketball with cartoom aliens"
}
}
]
}
}
}
# 查看打分規則:dis_max
GET /movie/_validate/query?explain
{
"query": {
"multi_match": {
"query": "basketball with cartoom aliens",
"fields": ["title^10", "overview"],
"type": "best_fields"
}
}
}
# most_fields:考慮絕大多數(所有的),文檔的字段得分相加獲得我們想要的結果
GET /movie/_search
{
"explain": true,
"query": {
"multi_match": {
"query": "basketball with cartoom aliens",
"fields": ["title", "overview"],
"type": "most_fields"
}
}
}
# 權重的調整是針對於 boost進行調整
GET /movie/_validate/query?explain
{
"query": {
"multi_match": {
"query": "basketball with cartoom aliens",
"fields": ["title^10", "overview^0.1"],
"type": "most_fields"
}
}
}
# cross_fields:以分詞爲單位計算欄位的總分,適用於詞導向的匹配模式
GET /movie/_search
{
"explain": true,
"query": {
"multi_match": {
"query": "steve jobs",
"fields": ["title", "overview"],
"type": "cross_fields"
}
}
}
GET /movie/_validate/query?explain
{
"query": {
"multi_match": {
"query": "steve jobs",
"fields": ["title", "overview"],
"type": "cross_fields"
}
}
}
# query string
# 方便的利用 AND OR NOT
GET /movie/_search
{
"query": {
"query_string": {
"fields": ["title"],
"query": "steve AND jobs"
}
}
}
過濾與排序
# filter 過濾查詢
# 單條件過濾
GET /movie/_search
{
"query": {
"bool": {
"filter": {
"term": {
"title": "steve"
}
}
}
}
}
# 多條件過濾
GET /movie/_search
{
"query": {
"bool": {
"filter": [
{
"term": {
"title": "steve"
}
},
{
"term": {
"cast.name": "gaspard"
}
}
]
}
}
}
# 多條件過濾
GET /movie/_search
{
"query": {
"bool": {
"filter": [
{
"term": {
"title": "steve"
}
},
{
"term": {
"cast.name": "gaspard"
}
},
{
"range": {
"release_date": {
"lte": "2015/01/01"
}
}
},
{
"range": {
"popularity": {
"gte": 25
}
}
}
]
}
}
}
# 多條件過濾並排序
GET /movie/_search
{
"query": {
"bool": {
"filter": [
{
"term": {
"title": "steve"
}
},
{
"term": {
"cast.name": "gaspard"
}
},
{
"range": {
"release_date": {
"lte": "2015/01/01"
}
}
},
{
"range": {
"popularity": {
"gte": 25
}
}
}
]
}
},
"sort": [
{
"popularity": {
"order": "desc"
}
}
]
}
# 帶 match 打分的 filter,should 控制打分,filter 控制過濾
GET /movie/_search
{
"query": {
"bool": {
"should": [
{
"match": {
"title": "life"
}
}
],
"filter": [
{
"term": {
"title": "steve"
}
},
{
"term": {
"cast.name": "gaspard"
}
},
{
"range": {
"release_date": {
"lte": "2015/01/01"
}
}
},
{
"range": {
"popularity": {
"gte": 25
}
}
}
]
}
}
}
查全率查準率
查全率:正確的結果有 n 個,查詢出來正確的有 m 個,所以查全率就是 m / n
查準率:查出的 n 個文檔有 m 個文檔是正確的,所以查準率就是 m / n
兩者不可兼得,但是可以調整順序
通常可以追求高的查全率,因爲查全率高必然會導致查準率降低,保證查準率中所查詢的 m 個內容排在前面,這樣既可以保證用戶體驗,還可以保證查全率。
自定義score
# function-score
GET /movie/_search
{
"explain": true,
"query": {
"function_score": {
# 原始查詢得到的 oldScore
"query": {
"multi_match": {
"query": "steve job",
"fields": [
"title",
"overview"
],
"operator": "or",
"type": "most_fields"
}
},
"functions": [
{
"field_value_factor": {
# 對應要調整處理的字段
"field": "popularity",
"modifier": "log2p",
"factor": 10
}
}
]
}
}
}
# function-score
GET /movie/_search
{
"explain": true,
"query": {
"function_score": {
# 原始查詢得到的 oldScore
"query": {
"multi_match": {
"query": "steve job",
"fields": [
"title",
"overview"
],
"operator": "or",
"type": "most_fields"
}
},
"functions": [
{
"field_value_factor": {
# 對應要調整處理的字段
"field": "popularity",
"modifier": "log2p",
"factor": 10
}
},
{
"field_value_factor": {
"field": "popularity",
"modifier": "log2p",
"factor": 5
}
}
],
# 不同的 field value 之間的得分相加
"score_mode": "sum",
# 最後再與 old value 相加
"boost_mode": "sum"
}
}
}