一般來說辑奈,ES的search接口最多返回1w條數(shù)據(jù)焕蹄,當(dāng)數(shù)據(jù)量很大又不方便修改參數(shù)的時候酬蹋,可以使用scroll的方式獲取身笤。
import requests
from easydict import EasyDict
from elasticsearch import Elasticsearch, helpers
ES_SERVER=[{'host': '127.0.0.1', 'port': 1234}]
ES_CLIENT = Elasticsearch(hosts=ES_SERVER)
def get_data(data_id):
result = helpers.scan(
client=ES_CLIENT,
query={"query": {"match": {"id": data_id}}},
scroll=u'5m',
index='index',
timeout='1m',
doc_type='index'
) # 方式1,通過python的ES客戶端
return [_result['_source'] for _result in result]
def get_data_by_api(data_id):
"""方式2斤彼,通過ES接口"""
post_data = {
"password": "******",
"method": "GET",
"url": "index/_search?scroll=1m",
"json": """{"query":{"match":{"id":"%s"}},"size": 10000}""" % indicator_id
}
result = list()
try:
rsp = requests.post(ES_SERVER_API, data=post_data)
rsp = EasyDict(rsp.json())
post_data.update(url="/_search/scroll")
except ValueError:
raise EnvironmentError('ES 未能成功返回?cái)?shù)據(jù)分瘦!')
while True:
result += rsp.hits.hits
scroll_id = rsp.get('_scroll_id')
if len(rsp.hits.hits) == 0:
break
post_data.update(json="""{"scroll_id": "%s", "scroll": "1m"}""" % scroll_id)
rsp = request.post_json(ES_SERVER_API, data=post_data)
return [_result['_source'] for _result in result]