前言
ElasticSearch,開源的搜索和數據分析引擎,也是名列前茅的 nosql。
很多時候會想拿它和關係型數據庫做對比,嚴格上它們是沒有太多可比性的。
不過把有的東西放在一起比較,會幫助我們快速去理解一些 ElasticSearch 的內容。
老黃這邊抽空梳理了一些常用的 sql 語句 “對應” ElasticSearch 的操作,主要時針對 CURD 的。
示例用的是 mysql 和 ElasticSearch 7.12.0 。
表操作
爲了簡單起見,弄一個簡單的訂單表 order-2021
來演示。
刪除表
drop table `order-2021`
DELETE http://localhost:9200/order-2021
創建表
create table `order-2021` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`order_id` varchar(32) NOT NULL,
`cus_name` varchar(20) NOT NULL,
`item_name` varchar(64) NOT NULL,
`number` int NOT NULL,
`create_time` bigint(20) NOT NULL,
`update_time` bigint(20) NOT NULL,
PRIMARY KEY (`id`),
KEY `idx_order_id` (`order_id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci
PUT http://localhost:9200/order-2021
Content-Type: application/json
{
"settings":{
"number_of_shards": 1
},
"mappings":{
"properties":{
"order_id" :{
"type":"keyword"
},
"cus_name" :{
"type":"keyword"
},
"item_name" :{
"type":"text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"number":{
"type":"integer"
},
"create_time" :{
"type":"long"
},
"update_time" :{
"type":"long"
}
}
}
}
NOTE:
- mysql 的索引需要一個一個的加,ES 這邊不需要。
- mysql 有自增 Id, ES 這邊也會有自動生成 Id 的操作,同樣的,兩邊也可以自定義 Id。
- keyword 和 text 按需調整。
改表名
rename table `order-2021` to `order`;
POST http://localhost:9200/_aliases
Content-Type: application/json
{
"actions" : [
{ "add" : { "index" : "order-2021", "alias" : "order" } }
]
}
NOTE: ES 的是起一個別名,然後兩個名字都可以用。
插入數據
單條插入
insert into `order-2021`
(order_id, cus_name, item_name, number, create_time, update_time)
values
('11', 'catcherwong', 'phone', 1, 1619877257000, 0)
POST http://localhost:9200/order-2021/_doc/
Content-Type: application/json
{
"order_id" : "11",
"cus_name":"catcherwong",
"item_name":"phone",
"number": 1,
"create_time": 1619877257000,
"update_time": 0
}
批量插入
insert into `order-2021` (order_id, cus_name, item_name, number, create_time, update_time)
values
('12', 'catcherwong', 'phone', 1, 1619877257000, 0),
('13', 'catcherwong', 'item-1', 2, 1619977257000, 0),
('14', 'catcherwong', 'item-2', 3, 1614877257000, 0);
POST http://localhost:9200/_bulk
Content-Type: application/x-ndjson
{ "index" : { "_index" : "order-2021" } }
{ "order_id" : "12", "cus_name":"catcherwong", "item_name":"phone", "create_time": 1619877257000, "update_time": 0 }
{ "index" : { "_index" : "order-2021" } }
{ "order_id" : "13", "cus_name":"catcherwong", "item_name":"item-1", "create_time": 1619977257000, "update_time": 0 }
{ "index" : { "_index" : "order-2021" } }
{ "order_id" : "14", "cus_name":"catcherwong", "item_name":"item-2", "create_time": 1614877257000, "update_time": 0 }
NOTE:
- ES 的批量操作需要注意,每一行最後要有換行符
\n
結尾- Content-Type 指定爲 application/x-ndjson
更新數據
根據ID更新
update `order-2021`
set update_time = '1619877307000', cus_name = 'catcherwong-1'
where id = '6wvox3kB4OeD0spWtstW'
POST http://localhost:9200/order-2021/_update/6wvox3kB4OeD0spWtstW
Content-Type: application/json
{
"doc":{
"update_time" : 1619877307000,
"cus_name": "catcherwong-1"
}
}
根據查詢條件更新
update `order-2021`
set update_time = '1619877307000', cus_name = 'catcherwong-1'
where order_id = '11'
POST http://localhost:9200/order-2021/_update_by_query
Content-Type: application/json
{
"script":{
"source":"ctx._source['cus_name']=params.cus_name;ctx._source['update_time']=params.update_time;",
"params":{
"cus_name":"catcherwong-1",
"update_time": 1619877307000
}
},
"query":{
"term":{
"order_id":"11"
}
}
}
NOTE: ES 的條件更新分爲兩部分,一個是 query ,也就是 where 部分, 一個是 script ,也就是 set 部分。
刪除數據
根據Id刪除
delete from `order-2021`
where id = 'c8cb33kBoze4GtqD9rTs'
DELETE http://localhost:9200/order-2021/_doc/c8cb33kBoze4GtqD9rTs
根據查詢條件刪除
delete from `order-2021`
where order_id = '11'
POST http://localhost:9200/order-2021/_delete_by_query
Content-Type: application/json
{
"query": {
"term": {
"order_id": "11"
}
}
}
查詢數據
查詢全部
select * from `order-2021`
GET http://localhost:9200/order-2021/_search
Content-Type: application/json
{
"query": {
"match_all": {}
}
}
條件查詢
select * from `order-2021`
where cus_name in ("catcher-61333", "catcher-89631")
and create_time >= 0
and create_time <= 1622555657322
GET http://localhost:9200/order/_search
Content-Type: application/json
{
"query":{
"bool":{
"filter":[
{ "terms":{ "cus_name" : [ "catcher-61333", "catcher-89631" ] }},
{ "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
]
}
}
}
NOTE:
- ES 的條件查詢是有非常多的, 這裏只列舉了部分。
- ES 的查詢,在默認情況下會有打分的操作,是會損耗性能的,而常規的 sql 查詢時不需要這些,所以用 bool + filter 來忽略。
查詢指定字段
select cus_name, order_id
from `order-2021`
where cus_name in ("catcher-61333", "catcher-89631")
and create_time >= 0
and create_time <= 1622555657322
GET http://localhost:9200/order-2021/_search
Content-Type: application/json
{
"_source":[ "cus_name", "order_id"],
"query":{
"bool":{
"filter":[
{ "terms":{ "cus_name" : [ "catcher-61333", "catcher-89631" ] }},
{ "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
]
}
}
}
NOTE: 查詢的時候,如果只要幾個字段,那麼可以通過
_source
來指定。
查詢數量
select count(*)
from `order-2021`
where cus_name in ("catcher-61333", "catcher-89631")
and create_time >= 0
and create_time <= 1622555657322
GET http://localhost:9200/order-2021/_count
Content-Type: application/json
{
"query":{
"bool":{
"filter":[
{ "terms":{ "cus_name" : [ "catcher-61333", "catcher-89631" ] }},
{ "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
]
}
}
}
NOTE: 根據指定條件獲取數量,建議用
_count
來查詢,_search
查詢的結果裏面的條數不一定準確。
淺分頁
select cus_name, order_id
from `order-2021`
where cus_name in ("catcher-61333", "catcher-89631")
and create_time >= 0
and create_time <= 1622555657322
order by create_time desc
limit 0,10
GET http://localhost:9200/order-2021/_search
Content-Type: application/json
{
"_source":[ "cus_name", "order_id"],
"query":{
"bool":{
"filter":[
{ "terms":{ "cus_name" : [ "catcher-61333", "catcher-89631" ] }},
{ "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
]
}
},
"size":10,
"from":0,
"sort":[
{ "create_time":{ "order":"desc"} }
]
}
NOTE: 淺分頁,頁碼不宜過深,適合滾動加載的場景,深度分頁可以考慮 SearchAfter
Group By
select number, count(*) as number_count
from `order-2021`
where create_time >= 0
and create_time <= 1622555657322
group by number
order by number asc
GET http://localhost:9200/order-2021/_search
Content-Type: application/json
{
"size":0,
"aggs": {
"number_count": {
"terms": {
"field": "number",
"order" : { "_key" : "asc" }
}
}
},
"query":{
"bool":{
"filter":[
{ "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
]
}
}
}
NOTE: group by 屬於聚合操作的一種,要用 aggs ,聚合,不用原數據,所以 size 設爲 0。
Avg/Min/Max/Sum
select avg(number) as number_avg,
min(number) as number_min,
max(number) as number_max,
sum(number) as number_sum
from order
where create_time >= 0
and create_time <= 1622555657322
GET http://localhost:9200/order/_search
Content-Type: application/json
{
"size":0,
"query":{
"bool":{
"filter":[
{ "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
]
}
},
"aggs": {
"number_avg": {
"avg": {
"field": "number"
}
},
"number_min": {
"min": {
"field": "number"
}
},
"number_max": {
"max": {
"field": "number"
}
},
"number_sum": {
"sum": {
"field": "number"
}
}
}
}
寫在最後
Elasticsearch 這一塊常用的 CURD 操作,和常用的關係型數據庫對照一遍,其實很容易上手。
要用到一些 Elasticsearch 特有的功能時,去查詢官方文檔也基本可以搞定。