先贴一个Query DSL的官方文档:https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
我平时喜欢查看官方文档,了解数据查询和存储方面的性能优化点,下面是积累的脚本分享。
- 查询语句格式
GET /索引名/_search
{
"查询类型": {
"查询条件": "查询条件值"
}
}
}
- 查询类型:match_all,match,term,range,fuzzy,bool 等等
- 查询条件:查询条件会根据类型的不同,写法也有差异
- 分词匹配 match
match进行搜索的时候,会先分词再匹配。通过operator指定多个分词之间的逻辑关系
{
"query": {
"match": {
"nickname": {
"query": "大山",
"operator": "and" // 默认值为or
}
}
}
}
// 下面查询是上面查询的简写
{
"query": {
"match": {
"nickname": "大山"
}
}
}
- 短语匹配match_phrase
匹配含有短语中所有单词的文档,且单词之间没有插入别的单词。等价于match中指定"type": “phrase”。slop值可以指定临近的距离。
{
"query": {
"match_phrase": {
"nickname": {
"query": "大山"
}
}
}
}
- 单项目匹配term
由于term查询时并不会对提供的查询值分词,但ES对文档指定field的值做了standard分词,因此,standard分词时term检索的值必须是不可拆分的才能匹配出结果。 例如,对于汉字,只支持单个汉字的匹配;对于英文单词,只支持单个单词的匹配,也就是中间不能有空格。也因此term适合数字、boolean、date、数字字母字符串精确匹配;
{
"query": {
"term": {
"author_id": "101572313821"
}
},
"sort": [
{
"crawl_time": {
"order": "desc"
}
}
],
"size": 30
}
- 单项目多值匹配terms
// 至少一个值在field中存在,相当于SQL中的IN
{
"query": {
"terms": {
"title": ["love", "China"]
}
}
}
// 多个值都必须在field中存在
{
"query": {
"bool": {
"must": [
{
"term": {
"title": "love"
}
},
{
"term": {
"title": "china"
}
}
]
}
}
}
- 逻辑运算should和must和must_not
must所有条件都必须满足
must_not 不能同时满足所有条件
should:至少满足一个就可以
{
"query": {
"bool": {
"must": [
{
"range": {
"crawl_time": {
"gte": "2021-03-29 00:00:00",
"lte": "2021-03-31 00:00:00"
}
}
},
{
"match_phrase": {
"author_id": "70577810926"
}
}
]
}
},
"sort": [
{
"crawl_time": {
"order": "desc"
}
}
]
}
- 不计算score的filter
使用 filter 来替代 must 查询,和must查询结果是一致的,差异仅是没有相关性得分,可以提高查询效率。建议了解下ES计算Score的查询实现。
{
"query": {
"bool": {
"filter": {
"term": {"id": "13"}
}
}
}
}
{
"query": {
"bool": {
"filter": [
{
"range": {
"ctime": {
"gte": "2021-03-28 00:00:00",
"lt": "2021-04-01 00:00:00"
}
}
},
{
"term": {
"verify_name": "山西大昌联品汽车销售服务有限公司"
}
}
]
}
}
}
// filter支持按固定分数查询。(指定 boost 匹配分数是 2 的结果,默认不填是 1)
{
"query": {
"constant_score": {
"filter": {
"match": {
"title": "火锅"
}
},
"boost": 2
}
}
}
- 多域匹配multi_match
有一个fields匹配就算成功
{
"query": {
"multi_match": {
"query": "串串",
"fields": [ "title", "tag"]
}
}
}
- 查询字符串query_string
{
"query": {
"query_string": {
"query": "(水煮肉 and 回锅肉) or 西葫芦"
}
}
}
{
"query": {
"query_string": {
"query": "中国声音",
"analyzer": "ik_max_word",
"fields": ["name","content"]
}
}
}
- 查询Object类型
{
"query": {
"term": {
"language.v4.keyword": "Spanish"
}
}
}
- 查询内嵌类型
字段类型必须设置"type": “nested”
{
"query": {
"nested": {
"path": "series",
"query": {
"match": {
"series.series_name": "车"
}
}
}
}
}
- 查询Array的长度
GET /aweme-comment-dev/_search
{
"query": {
"bool": {
"filter": [
{
"script": {
"script": {
"source": "doc['hot_words.word'].length==2",
"lang": "painless"
}
}
}
]
}
}
}
- 精确获取Hit的数量
GET /aweme-comment-dev/_search
{
"track_total_hits": true,
"query": {
"bool": {
"filter": {
"range": {
"ctime": {
"gte": "2021-04-10 00:00:00",
"lte": "2021-04-10 23:59:59"
}
}
}
}
}
}
- 复杂Painless查询
GET /aweme-info-pro/_search?timeout=10000m
{
"query": {
"bool": {
"must": [
{
"range": {
"statistics_day": {
"gte": "2021-06-01",
"lte": "2021-08-01"
}
}
}
],
"filter": {
"script": {
"script": "if (doc['publish_time'].size() != 0 && doc['statistics_day'].size() !=0 ){ doc['statistics_day'].value.toLocalDate().isBefore(doc['publish_time'].value.toLocalDate())} else {return false}"
}
}
}
}
}
- 两个字段间比较
GET /fans-pro/_search
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "fans_id"
}
}
],
"filter": [
{
"script": {
"script": {
"source": "doc['_id'].value != doc['fans_id'].value",
"lang": "painless"
}
}
}
]
}
},
"sort": [
{
"mtime": {
"order": "desc"
}
}
]
}
- 只显示指定的字段 _source
GET /aweme-comment-dev/_search
{
"_source": "fans_id",
"query": {
"bool": {
"filter": {
"range": {
"ctime": {
"gte": "2021-04-10 00:00:00",
"lte": "2021-04-10 23:59:59"
}
}
}
}
}
}
- 通过includes和excludes指定需要和不需要的字段
{
"_source": {
"includes": [ "obj1.*", "obj2.*" ],
"excludes": [ "*.description" ]
},
"query": {
"term": {
"user.id": "kimchy"
}
}
}
- 其他请求参数
// 显示版本号
"version": true
// 显示分片信息
"explain": true
- 文档数量查询
GET /aweme-info-dev/_count
GET /author-info-dev/_count
{
"query": {
"term": {
"author_id": "96660564486"
}
}
}
- 聚合查询
Bucket可以理解为一个桶,它会遍历文档中的内容,凡是符合某一要求的就放在一个桶中,分桶相当于sql中的group by, 关键字有Terms Aggregation,Filter Aggregation,Histogram Aggregation, Date Aggregation。
GET /aweme-comment-dev/_search
{
"query": {
"range": {
"ctime": {
"gte": "2021-04-10 00:00:00",
"lte": "2021-04-10 23:59:59"
}
}
},
"size": 0,
"aggs": {
"count_fans": {
"value_count": {
"field": "fans_id"
}
}
}
}
GET /author-info-dev/_search
{
"query": {
"term": {
"author_id": "96660564486"
}
},
"size": 0,
"aggs": {
"crawl_time_stats": {
"stats": {
"field": "crawl_time"
}
}
}
}
聚合类型包含
min: Computes the minimum value for a group of buckets.
max: Computes the maximum value for a group of buckets.
avg: Computes the average value for a group of buckets.
sum: Computes the sum of all the buckets.
value_count: Computes the count of values in the bucket.
stats: Computes all the base metrics such as the min, max, avg, count, and sum.
extended_stats: Computes the stats metric plus variance, standard deviation (std_deviation), bounds of standard deviation (std_deviation_bounds), and sum of squares (sum_of_squares).
percentiles: Computes the percentiles (the point at which a certain percentage of observed values occur) of some values (see Wikipedia at http://en.wikipedia.org/wiki/Percentile for more information about percentiles).
percentile_ranks: Computes the rank of values that hit a percentile range.
cardinality: Computes an approximate count of distinct values in a field.
geo_bounds: Computes the maximum geobounds in the document where the GeoPoints are.
geo_centroid: Computes the centroid in the document where GeoPoints are.
- 按值聚合
GET /aweme-info-dev/_search
{
"query": {
"range": {
"ctime": {
"gte": "2021-04-10 00:00:00",
"lte": "2021-04-10 23:59:59"
}
}
},
"size": 0,
"aggs": {
"aweme_id_agg": {
"terms": {
"field": "author_id",
"size": 100 # 按文档数量doc_count字段倒排序后并取前100个author_id
},
"aggs": {
"total_digg_count": {
"sum": {
"field": "digg_count_di"
}
},
"avg_digg_count": {
"avg": {
"field": "digg_count_di"
}
},
"agg_digg_count": {
"terms": {
"field": "digg_count_di"
}
}
}
}
}
}
修改terms的排序方式:“order”: {“_count”: “asc”}
指定terms中doc_count的最小数量:“min_doc_count”: 3
指定terms中需要统计的值:“include”: [“BMW”, “Audi”]文章来源:https://www.toymoban.com/news/detail-471900.html
GET /aweme-info-dev/_search
{
"size": 0,
"query": {
"range": {
"ctime": {
"gte": "2021-04-10 00:00:00",
"lte": "2021-04-10 23:59:59"
}
}
},
"aggs": {
"最小": {
"min": {
"field": "digg_count"
}
},
"最大": {
"max": {
"field": "digg_count"
}
},
"平均值": {
"avg": {
"field": "digg_count"
}
},
"求和": {
"sum": {
"field": "digg_count"
}
},
"去重count": {
"cardinality": {
"field": "author_id"
}
},
"分组去重": {
"terms": {
"field": "aweme_id",
"size": 10
},
"aggs": {
"组内去重": {
"cardinality": {
"field": "author_id"
}
},
"组内求和": {
"sum": {
"field": "digg_count"
}
}
}
}
}
}
- 按值范围聚合
GET /author-info-dev/_search
{
"track_total_hits": true,
"size": 0,
"aggs": {
"digg_distribute": {
"range": {
"field": "comment_count",
"ranges": [
{"to": 500},
{"from": 501, "to": 1000},
{"from": 1001}
]
}
}
}
}
GET /cars/cars/_search
{
"aggs": {
"range": {
"date_range": {
"field": "sellTime",
"format": "yyyy",
"ranges": [
{
"from": "2014",
"to": "2019"
}
]
}
}
}
}
- 直方图聚合(按值间隔聚合)
GET /cars/cars/_search
{
"aggs": {
"prices": {
"histogram": {
"field": "price",
"interval": 10000
}
}
}
}
GET /cars/cars/_search
{
"aggs": {
"sales_over_time": {
"date_histogram": {
"field": "sellTime",
"interval": "month",
"format": "yyyy-MM-dd"
}
}
}
}
GET /fans-follow-pro/_search
{
"query": {
"bool": {
"filter": {
"range": {
"crawl_time": {
"gte": "2021-04-12 00:00:00",
"lte": "2021-04-21 00:00:00"
}
}
}
}
},
"size": 0,
"aggs": {
"range": {
"date_histogram": {
"field": "crawl_time",
"format": "yyyy-MM-dd",
"interval": "day"
}
}
}
}
- 分组排序取第一
GET /aweme-info-dev/_search
{
"query": {
"range": {
"ctime": {
"gte": "2021-04-10 00:00:00",
"lte": "2021-04-10 23:59:59"
}
}
},
"size": 0,
"aggs": {
"aweme_id_agg": {
"terms": {
"field": "author_id",
"order": {
"_count": "asc"
},
"size": 10
},
"aggs": {
"NAME": {
"top_hits": {
"size": 1,
"sort": {"publish_time": "desc"}, # 按publish_time倒排序
"_source": ["publish_time", "ctime"]
}
}
}
}
}
}
GET /dongchedi-live-info-afanti-pro/_search
{
"query": {
"range": {
"crawl_time": {
"lte": "2021-08-19 00:00:00",
"gte": "2021-08-18 00:00:00"
}
}
},
"aggs": {
"NAME": {
"terms": {
"field": "room_id",
"size": 10,
"order": {
"_term": "asc"
}
},
"aggs": {
"NAME": {
"top_hits": {
"size": 1,
"sort": [
{
"crawl_time": {
"order": "desc"
}
}
],"_source": ["crawl_time", "room_id", "room_status", "finish_time"]
}
}
}
}
},
"size": 0
}
- Group by + Distinct + Count
GET /dongchedi-live-polling-afanti-pro/_search
{
"query": {
"range": {
"crawl_time": {
"lte": "2021-08-18 00:00:00",
"gte": "2021-08-01 00:00:00"
}
}
},
"aggs": {
"NAME": {
"date_histogram": {
"field": "crawl_time",
"interval": "day"
},"aggs": {
"doc_count": {
"cardinality": {
"field": "msg_id"
}
}
}
}
}
}
GET /dongchedi-live-polling-afanti-pro/_search
{
"query": {
"range": {
"crawl_time": {
"lte": "2021-09-23 00:00:00",
"gte": "2021-08-18 00:00:00"
}
}
},
"aggs": {
"NAME": {
"date_histogram": {
"field": "crawl_time",
"interval": "day"
},
"aggs": {
"abc": {
"cardinality": {
"precision_threshold": 40000, # 当统计结果小于配置的 precision_threshold,此时是准确的;反之,则统计结果可能会有误差。最大为40000。
"script": {
"source": "doc['room_id'].value + ' ' + doc['msg_id'].value"
}
}
}
}
}
}
}
- Group by + Having+Count + Distinct
GET /aweme-info-pro/_search
{
"size": 0,
"aggs": {
"NAME": {
"terms": {
"field": "item_id",
"size": 200,
"order": {
"amount": "desc"
}
},
"aggs": {
"amount": {
"cardinality": {
"field": "author_id"
}
},
"having": {
"bucket_selector": {
"buckets_path": {
"amount": "amount"
},
"script": {
"source": "params.amount >= 2 "
}
}
}
}
}
}
}
- Collapse实现Distinct
GET /author-info-dev/_search
{
"query": {
"term": {
"author_id": "96660564486"
}
},
"collapse": {
"field": "author_id"
}
}
- 通配符wildcard和模糊匹配fuzzy
wildcard和fuzzy在standard分词器都无法匹配多个汉字
GET /author-info-dev/_search
{
"query": {
"bool": {
"filter": {
"wildcard": {
"nickname": {
"value": "姐"
}
}
}
}
},
"_source": "nickname"
}
GET /author-info-dev/_search
{
"query": {
"bool": {
"filter": {
"wildcard": {
"nickname": {
"value": "*姐*"
}
}
}
}
},
"size": 0,
"aggs": {
"distinct_nickname": {
"terms": {
"field": "city_id",
"size": 100
}
}
}
}
GET /author-info-dev/_search
{
"query": {
"fuzzy": {
"signature":"老司机"
}
}
}
Cannot search on field [signature] since it is not indexed.文章来源地址https://www.toymoban.com/news/detail-471900.html
- 前缀查询Prefix
prefix在standard分词器也无法匹配多个汉字
GET /author-info-dev/_search
{
"query": {
"prefix": {
"author_id": {
"value": "2"
}
}
},
"_source": "author_id"
}
到了这里,关于ElasticSearch Index查询(Query DSL)的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!