#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2019/11/19
@Author : LXW
@Site :
@File : elasticSearch_utils.py
@Software: PyCharm
@Description: 主要用於對es的查詢操作(大量數據查詢)
"""
from elasticsearch import Elasticsearch
class ElasticSearchUtils:
def __init__(self, host):
self.cli = Elasticsearch(hosts=host)
def search_by_scroll_id(self, index=None, doc_type=None, size=1000, agg=None):
"""
使用遊標的方式滾動查詢大量數據
默認遊標過期時間爲兩分鐘
ElasticSearch
5.X 版本以下使用 search_type='scan'
5.X 版本以上使用 sort='_doc'
:param index: 索引名
:param doc_type: 文檔類型
:param size: 單次查詢請求的數據量
:param agg: 查詢聚合語句
:return: 全部查詢結果
"""
all_data = []
hists = self.cli.search(
index=index,
doc_type=doc_type,
scroll='2m',
sort='_doc',
size=size,
body=agg
)
scroll_id = hists['_scroll_id']
scroll_size = hists['hits']['total']
for hit in hists["hits"]["hits"]:
all_data.append(hit["_source"])
# Start scrolling
while scroll_size > 0:
page = self.cli.scroll(scroll_id=scroll_id, scroll='2m')
# Update the scroll ID
scroll_id = page['_scroll_id']
# Get the number of results that we returned in the last scroll
scroll_size = len(page['hits']['hits'])
for hit in page["hits"]["hits"]:
all_data.append(hit["_source"])
return all_data
if __name__ == '__main__':
body = {
"size": 1000,
"query": {
"bool": {
"must": [
{
"match_all": {}
},
{
"range": {
"@timestamp": {
"gte": 1574149294213,
"lte": 1574150194214,
"format": "epoch_millis"
}
}
}
],
"must_not": []
}
}
}
# data = ElasticSearchUtils(["127.0.0.14:9200", "127.0.0.15:9200", "127.0.0.220:9200"]).search_by_scroll_id(index="test-*", doc_type="api", size=1000, body=body)
data = ElasticSearchUtils("127.0.0.14:9200").search_by_scroll_id(index="test-*", doc_type="test", size=1000, agg=body)
for h in data:
print(h)
break
遊標查詢elasticsearch數據
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.