聊一聊Elasticsearch和MySQL的常用操作

前言

ElasticSearch,開源的搜索和數據分析引擎,也是名列前茅的 nosql。

很多時候會想拿它和關係型數據庫做對比,嚴格上它們是沒有太多可比性的。

不過把有的東西放在一起比較,會幫助我們快速去理解一些 ElasticSearch 的內容。

老黃這邊抽空梳理了一些常用的 sql 語句 “對應” ElasticSearch 的操作,主要時針對 CURD 的。

示例用的是 mysql 和 ElasticSearch 7.12.0 。

表操作

爲了簡單起見,弄一個簡單的訂單表 order-2021 來演示。

刪除表

drop table `order-2021`
DELETE http://localhost:9200/order-2021

創建表

create table `order-2021` ( 
`id` bigint(20) NOT NULL AUTO_INCREMENT, 
`order_id` varchar(32) NOT NULL, 
`cus_name` varchar(20) NOT NULL, 
`item_name` varchar(64) NOT NULL, 
`number` int NOT NULL, 
`create_time` bigint(20) NOT NULL, 
`update_time` bigint(20) NOT NULL,
PRIMARY KEY (`id`),
KEY `idx_order_id` (`order_id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci
PUT http://localhost:9200/order-2021
Content-Type: application/json

{
    "settings":{
        "number_of_shards": 1
    },
    "mappings":{
        "properties":{
            "order_id" :{
                "type":"keyword"
            },
            "cus_name" :{
                "type":"keyword"
            },
            "item_name" :{
                "type":"text",
                "fields": {
                    "keyword": { 
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            },
            "number":{
                "type":"integer"
            },
            "create_time" :{
                "type":"long"
            },
            "update_time" :{
                "type":"long"
            }
        }
    }
}

NOTE:

  1. mysql 的索引需要一個一個的加,ES 這邊不需要。
  2. mysql 有自增 Id, ES 這邊也會有自動生成 Id 的操作,同樣的,兩邊也可以自定義 Id。
  3. keyword 和 text 按需調整。

改表名

rename table `order-2021` to `order`;
POST http://localhost:9200/_aliases
Content-Type: application/json

{
  "actions" : [
    { "add" : { "index" : "order-2021", "alias" : "order" } }
  ]
}

NOTE: ES 的是起一個別名,然後兩個名字都可以用。

插入數據

單條插入

insert into `order-2021` 
(order_id, cus_name, item_name, number, create_time, update_time) 
values 
('11', 'catcherwong', 'phone', 1, 1619877257000, 0)
POST http://localhost:9200/order-2021/_doc/
Content-Type: application/json

{ 
    "order_id" : "11", 
    "cus_name":"catcherwong", 
    "item_name":"phone",
    "number": 1,
    "create_time": 1619877257000, 
    "update_time": 0 
}

批量插入

insert into `order-2021` (order_id, cus_name, item_name, number, create_time, update_time) 
values 
('12', 'catcherwong', 'phone', 1, 1619877257000, 0),
('13', 'catcherwong', 'item-1', 2, 1619977257000, 0),
('14', 'catcherwong', 'item-2', 3, 1614877257000, 0);
POST http://localhost:9200/_bulk
Content-Type: application/x-ndjson

{ "index" : { "_index" : "order-2021" } }
{ "order_id" : "12", "cus_name":"catcherwong", "item_name":"phone", "create_time": 1619877257000, "update_time": 0 }
{ "index" : { "_index" : "order-2021" } }
{ "order_id" : "13", "cus_name":"catcherwong", "item_name":"item-1", "create_time": 1619977257000, "update_time": 0 }
{ "index" : { "_index" : "order-2021" } }
{ "order_id" : "14", "cus_name":"catcherwong", "item_name":"item-2", "create_time": 1614877257000, "update_time": 0 }

NOTE:

  1. ES 的批量操作需要注意,每一行最後要有換行符 \n 結尾
  2. Content-Type 指定爲 application/x-ndjson

更新數據

根據ID更新

update `order-2021` 
set update_time = '1619877307000', cus_name = 'catcherwong-1' 
where id = '6wvox3kB4OeD0spWtstW'
POST http://localhost:9200/order-2021/_update/6wvox3kB4OeD0spWtstW
Content-Type: application/json

{
    "doc":{
        "update_time" : 1619877307000,
        "cus_name": "catcherwong-1"
    }    
}

根據查詢條件更新

update `order-2021` 
set update_time = '1619877307000', cus_name = 'catcherwong-1' 
where order_id = '11'
POST http://localhost:9200/order-2021/_update_by_query
Content-Type: application/json

{
    "script":{
        "source":"ctx._source['cus_name']=params.cus_name;ctx._source['update_time']=params.update_time;",
        "params":{
            "cus_name":"catcherwong-1",
            "update_time": 1619877307000
        }
    },
    "query":{
        "term":{
            "order_id":"11"
        }
    }
}

NOTE: ES 的條件更新分爲兩部分,一個是 query ,也就是 where 部分, 一個是 script ,也就是 set 部分。

刪除數據

根據Id刪除

delete from `order-2021` 
where id = 'c8cb33kBoze4GtqD9rTs'
DELETE http://localhost:9200/order-2021/_doc/c8cb33kBoze4GtqD9rTs

根據查詢條件刪除

delete from `order-2021` 
where order_id = '11'
POST http://localhost:9200/order-2021/_delete_by_query
Content-Type: application/json

{
  "query": {
    "term": {
      "order_id": "11"
    }
  }
}

查詢數據

查詢全部

select * from `order-2021`
GET http://localhost:9200/order-2021/_search
Content-Type: application/json

{
  "query": {    
    "match_all": {}
  }
}

條件查詢

select * from `order-2021` 
where cus_name in ("catcher-61333", "catcher-89631") 
and create_time >= 0 
and create_time <= 1622555657322
GET http://localhost:9200/order/_search
Content-Type: application/json

{
    "query":{
        "bool":{
            "filter":[
                { "terms":{ "cus_name" : [ "catcher-61333", "catcher-89631" ] }},
                { "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
            ]
        }        
    }
}

NOTE:

  1. ES 的條件查詢是有非常多的, 這裏只列舉了部分。
  2. ES 的查詢,在默認情況下會有打分的操作,是會損耗性能的,而常規的 sql 查詢時不需要這些,所以用 bool + filter 來忽略。

查詢指定字段

select cus_name, order_id 
from `order-2021` 
where cus_name in ("catcher-61333", "catcher-89631") 
and create_time >= 0 
and create_time <= 1622555657322
GET http://localhost:9200/order-2021/_search
Content-Type: application/json

{
    "_source":[ "cus_name", "order_id"],
    "query":{
        "bool":{
            "filter":[
                { "terms":{ "cus_name" :  [ "catcher-61333", "catcher-89631" ] }},
                { "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
            ]
        }        
    }
}

NOTE: 查詢的時候,如果只要幾個字段,那麼可以通過 _source 來指定。

查詢數量

select count(*) 
from `order-2021` 
where cus_name in ("catcher-61333", "catcher-89631") 
and create_time >= 0 
and create_time <= 1622555657322
GET http://localhost:9200/order-2021/_count
Content-Type: application/json

{
    "query":{
        "bool":{
            "filter":[
                { "terms":{ "cus_name" :  [ "catcher-61333", "catcher-89631" ] }},
                { "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
            ]
        }        
    }
}

NOTE: 根據指定條件獲取數量,建議用 _count 來查詢,_search 查詢的結果裏面的條數不一定準確。

淺分頁

select cus_name, order_id 
from `order-2021` 
where cus_name in ("catcher-61333", "catcher-89631") 
and create_time >= 0 
and create_time <= 1622555657322
order by create_time desc 
limit 0,10
GET http://localhost:9200/order-2021/_search
Content-Type: application/json

{
    "_source":[ "cus_name", "order_id"],
    "query":{
        "bool":{
            "filter":[
                { "terms":{ "cus_name" :  [ "catcher-61333", "catcher-89631" ] }},
                { "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
            ]
        }        
    },
    "size":10,
    "from":0,
    "sort":[
        { "create_time":{ "order":"desc"} }
    ]
}

NOTE: 淺分頁,頁碼不宜過深,適合滾動加載的場景,深度分頁可以考慮 SearchAfter

Group By

select number, count(*) as number_count 
from `order-2021` 
where create_time >= 0 
and create_time <= 1622555657322
group by number 
order by number asc
GET http://localhost:9200/order-2021/_search
Content-Type: application/json

{
    "size":0,
    "aggs": {
        "number_count": {
            "terms": {
                "field": "number",
                "order" : { "_key" : "asc" }
            }
        }
    },
    "query":{
        "bool":{
            "filter":[
                { "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
            ]
        }        
    }
}

NOTE: group by 屬於聚合操作的一種,要用 aggs ,聚合,不用原數據,所以 size 設爲 0。

Avg/Min/Max/Sum

select avg(number) as number_avg, 
min(number) as number_min, 
max(number) as number_max, 
sum(number) as number_sum 
from order 
where create_time >= 0 
and create_time <= 1622555657322 
GET http://localhost:9200/order/_search
Content-Type: application/json

{
    "size":0,
    "query":{
        "bool":{
            "filter":[
                { "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
            ]
        }        
    },
    "aggs": {
        "number_avg": {
            "avg": {
                "field": "number"
            }
        },
        "number_min": {
            "min": {
                "field": "number"
            }
        },
        "number_max": {
            "max": {
                "field": "number"
            }
        },
        "number_sum": {
            "sum": {
                "field": "number"
            }
        }
    }
}

寫在最後

Elasticsearch 這一塊常用的 CURD 操作,和常用的關係型數據庫對照一遍,其實很容易上手。

要用到一些 Elasticsearch 特有的功能時,去查詢官方文檔也基本可以搞定。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章