ES截取内嵌字段分组
1 背景
根据ES的内嵌字段中的@符号截取第n个@符号之前的字符进行分组统计文档数量。该字段格式为:@xxx@yyy@zzz@
、@aaa11@bbb222@ccc333@
2 映射
{
"ads_hmap_event_info" : {
"mappings" : {
"properties" : {
"CJSJ" : {
"type" : "keyword"
},
"CXLX" : {
"type" : "keyword"
},
"CZLX" : {
"type" : "keyword"
},
"CZYJ" : {
"type" : "keyword"
},
"DWJWD" : {
"type" : "geo_point"
},
"DXLXBM" : {
"type" : "keyword"
},
"DXLXMC" : {
"type" : "keyword"
},
"DYCLRYPZID" : {
"type" : "keyword"
},
"DYCLRYPZMC" : {
"type" : "keyword"
},
"DYCLRYPZMCLJ" : {
"type" : "keyword"
},
"FJBM" : {
"type" : "keyword"
},
"FJMC" : {
"type" : "keyword"
},
"FSDZ" : {
"type" : "keyword"
},
"FSSJ" : {
"type" : "keyword"
},
"GXSJ" : {
"type" : "keyword"
},
"HBGD" : {
"type" : "double"
},
"JD" : {
"type" : "double"
},
"JDALBS" : {
"type" : "integer"
},
"JKDBM" : {
"type" : "keyword"
},
"JKDMC" : {
"type" : "keyword"
},
"KZXX" : {
"type" : "text"
},
"LCGXSJ" : {
"type" : "keyword"
},
"LCKSSJ" : {
"type" : "keyword"
},
"LCSLID" : {
"type" : "keyword"
},
"LCZT" : {
"type" : "keyword"
},
"LXDH" : {
"type" : "keyword"
},
"MXBM" : {
"type" : "keyword"
},
"MXMC" : {
"type" : "keyword"
},
"MXSSBMDM" : {
"type" : "keyword"
},
"MXSSBMMC" : {
"type" : "keyword"
},
"PCSDM" : {
"type" : "keyword"
},
"PCSMC" : {
"type" : "keyword"
},
"QSJZSJ" : {
"type" : "keyword"
},
"SBBM" : {
"type" : "keyword"
},
"SBMC" : {
"type" : "keyword"
},
"SFJZFJWJ" : {
"type" : "integer"
},
"SFQSCS" : {
"type" : "integer"
},
"SFWJCS" : {
"type" : "integer"
},
"SJBH" : {
"type" : "keyword"
},
"SJBT" : {
"type" : "text"
},
"SJDJ" : {
"type" : "keyword"
},
"SJDM" : {
"type" : "keyword"
},
"SJDXBM" : {
"type" : "keyword"
},
"SJDXMC" : {
"type" : "keyword"
},
"SJLX" : {
"type" : "keyword"
},
"SJLY" : {
"type" : "keyword"
},
"SJMC" : {
"type" : "keyword"
},
"SJTP" : {
"type" : "keyword"
},
"SJXQ" : {
"type" : "text"
},
"SJZT" : {
"type" : "integer"
},
"SJ_ZJ" : {
"type" : "keyword"
},
"SSXZQHDM" : {
"type" : "keyword"
},
"SSXZQHMC" : {
"type" : "keyword"
},
"WD" : {
"type" : "double"
},
"WJJZSJ" : {
"type" : "keyword"
},
"WJZT" : {
"type" : "keyword"
},
"YJCFSJ" : {
"type" : "keyword"
},
"ZXCLCZLX" : {
"type" : "integer"
},
"ZXCLLCYJ" : {
"type" : "keyword"
},
"ZXCLRBMID" : {
"type" : "keyword"
},
"ZXCLRBMMC" : {
"type" : "keyword"
},
"ZXCLRID" : {
"type" : "keyword"
},
"ZXCLRMC" : {
"type" : "keyword"
},
"ZXCLRYPZID" : {
"type" : "keyword"
},
"ZXCLRYPZMC" : {
"type" : "keyword"
},
"criticalValue" : {
"type" : "long"
},
"eventProcessHistories" : {
"type" : "nested",
"properties" : {
"alarmNo" : {
"type" : "keyword"
},
"branchComplete" : {
"type" : "integer"
},
"completeStatus" : {
"type" : "keyword"
},
"createTime" : {
"type" : "keyword"
},
"effectType" : {
"type" : "keyword"
},
"endProcess" : {
"type" : "integer"
},
"files" : {
"type" : "keyword"
},
"isStatistics" : {
"type" : "integer"
},
"modelNo" : {
"type" : "keyword"
},
"operatorDeptId" : {
"type" : "keyword"
},
"operatorDeptName" : {
"type" : "keyword"
},
"operatorDeptTree" : {
"type" : "keyword"
},
"operatorId" : {
"type" : "keyword"
},
"operatorJudgeCode" : {
"type" : "keyword"
},
"operatorJudgeName" : {
"type" : "keyword"
},
"operatorJudgeTreeCode" : {
"type" : "keyword"
},
"operatorJudgeTreeName" : {
"type" : "keyword"
},
"operatorName" : {
"type" : "keyword"
},
"operatorOpin" : {
"type" : "keyword"
},
"operatorStatus" : {
"type" : "integer"
},
"processId" : {
"type" : "keyword"
},
"processTimeout" : {
"type" : "integer"
},
"receiveDeptId" : {
"type" : "keyword"
},
"receiveDeptName" : {
"type" : "keyword"
},
"receiveDeptTreeCode" : {
"type" : "keyword"
},
"receiveDeptTreeName" : {
"type" : "keyword"
},
"receiveGroupTypeCode" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"receiveUserId" : {
"type" : "keyword"
},
"receiveUserName" : {
"type" : "keyword"
}
}
},
"hasUrged" : {
"type" : "integer"
}
}
}
}
}
3 如果只是截取固定长度
如果需求是固定截取字段的前n个字符进行分组,可以直接用substring方法
- 注意先判断被截取字段非空
3.1 示例
{
"size": 0,
"query": {
"bool": {
"must": [
{
"nested": {
"query": {
"bool": {
"must": [
{
"term": {
"eventProcessHistories.isStatistics": {
"value": "1",
"boost": 1.0
}
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
},
"path": "eventProcessHistories",
"ignore_unmapped": false,
"score_mode": "none",
"boost": 1.0
}
}
],
"filter": [
{
"range": {
"YJCFSJ": {
"from": "2024-04-29T00:00:00.000Z",
"to": "2024-05-29T23:59:59.000Z",
"include_lower": true,
"include_upper": true,
"boost": 1.0
}
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
},
"aggregations": {
"nested_eventProcessHistories": {
"nested": {
"path": "eventProcessHistories"
},
"aggregations": {
"group_by_receive_dept_tree_code": {
"terms": {
"script": {
"source": "if (doc['eventProcessHistories.receiveDeptTreeCode'].size() > 0 && doc['eventProcessHistories.receiveDeptTreeCode'].value.length() >= 58) { doc['eventProcessHistories.receiveDeptTreeCode'].value.substring(0, 58) } else { '' }",
"lang": "painless"
},
"size": 8,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"group_by_alarm_no": {
"cardinality": {
"field": "eventProcessHistories.alarmNo"
}
},
"handleAlarmNum_results": {
"filter": {
"bool": {
"must": [
{
"terms": {
"eventProcessHistories.operatorStatus": [
-1,
0
],
"boost": 1.0
}
},
{
"term": {
"eventProcessHistories.endProcess": {
"value": 0,
"boost": 1.0
}
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
},
"aggregations": {
"group_by_alarmNo_condition": {
"cardinality": {
"field": "eventProcessHistories.alarmNo"
}
}
}
},
"noSignAlarmNum_results": {
"filter": {
"bool": {
"must": [
{
"terms": {
"eventProcessHistories.operatorStatus": [
-2,
1
],
"boost": 1.0
}
},
{
"term": {
"eventProcessHistories.endProcess": {
"value": 0,
"boost": 1.0
}
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
},
"aggregations": {
"group_by_alarmNo_condition": {
"cardinality": {
"field": "eventProcessHistories.alarmNo"
}
}
}
}
}
}
}
}
}
}
4 截取第n个@符号之前字符
4.1 思路
将字段根据@符号先拆分,然后再组装起来
4.2 问题
ES不支持使用String中的split方法,报dynamic method [java.lang.String, split/1] not found
的错
4.3 解决
使用ES的 /pattern/
模式分隔字符串。
String#split
is not whitelisted because it creates dynamicPattern
objects internally. UsePattern#split
instead like/\//.split(urlAsString[0])
.
参考:
- https://github.com/elastic/elasticsearch/issues/26338
- https://discuss.elastic.co/t/can-java-util-regex-pattern-be-used-in-a-painless-script/213789
- https://www.elastic.co/guide/en/elasticsearch/painless/7.5/painless-walkthrough.html#modules-scripting-painless-regex
4.4 示例
- 注意对被拆分的字段判断非空
- 示例截取第3个@符号之前的字符进行分组
GET /ads_hmap_event_info/_doc/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"nested": {
"path": "eventProcessHistories",
"query": {
"bool": {
"must": [
{
"term": {
"eventProcessHistories.isStatistics": "1"
}
},
{
"exists": {
"field": "eventProcessHistories.receiveDeptTreeCode"
}
}
]
}
}
}
},
{
"range": {
"YJCFSJ": {
"from": "2024-04-29T00:00:00.000Z",
"to": "2024-05-29T23:59:59.000Z",
"include_lower": true,
"include_upper": true
}
}
}
]
}
},
"aggregations": {
"nested_eventProcessHistories": {
"nested": {
"path": "eventProcessHistories"
},
"aggregations": {
"group_by_receive_dept_tree_code": {
"terms": {
"script": {
"source": """
if(doc['eventProcessHistories.receiveDeptTreeCode'].empty) { return 'missing'; } else {
def str = doc['eventProcessHistories.receiveDeptTreeCode'].value;
def parts = /@/.split(str);
def groupKey = '@' + parts[1] + '@' + parts[2] + '@';
return groupKey;
}
"""
},
"size": 8,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"group_by_alarm_no": {
"cardinality": {
"field": "eventProcessHistories.alarmNo"
}
},
"handleAlarmNum_results": {
"filter": {
"bool": {
"must": [
{
"terms": {
"eventProcessHistories.operatorStatus": [
-1,
0
]
}
},
{
"term": {
"eventProcessHistories.endProcess": 0
}
}
]
}
},
"aggregations": {
"group_by_alarmNo_condition": {
"cardinality": {
"field": "eventProcessHistories.alarmNo"
}
}
}
},
"noSignAlarmNum_results": {
"filter": {
"bool": {
"must": [
{
"terms": {
"eventProcessHistories.operatorStatus": [
-2,
1
]
}
},
{
"term": {
"eventProcessHistories.endProcess": 0
}
}
]
}
},
"aggregations": {
"group_by_alarmNo_condition": {
"cardinality": {
"field": "eventProcessHistories.alarmNo"
}
}
}
}
}
}
}
}
}
}
4.5 返回
{
"took" : 87,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 32,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"nested_eventProcessHistories" : {
"doc_count" : 54,
"group_by_receive_dept_tree_code" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "@330100000000@1961017e22c885cc5e8f03cc62cf4e3d4LfvGPDWWwt@",
"doc_count" : 38,
"group_by_alarm_no" : {
"value" : 31
},
"noSignAlarmNum_results" : {
"doc_count" : 26,
"group_by_alarmNo_condition" : {
"value" : 26
}
},
"handleAlarmNum_results" : {
"doc_count" : 2,
"group_by_alarmNo_condition" : {
"value" : 2
}
}
},
{
"key" : "@330100000000@330108000000@",
"doc_count" : 9,
"group_by_alarm_no" : {
"value" : 2
},
"noSignAlarmNum_results" : {
"doc_count" : 0,
"group_by_alarmNo_condition" : {
"value" : 0
}
},
"handleAlarmNum_results" : {
"doc_count" : 0,
"group_by_alarmNo_condition" : {
"value" : 0
}
}
},
{
"key" : "missing",
"doc_count" : 7,
"group_by_alarm_no" : {
"value" : 4
},
"noSignAlarmNum_results" : {
"doc_count" : 0,
"group_by_alarmNo_condition" : {
"value" : 0
}
},
"handleAlarmNum_results" : {
"doc_count" : 0,
"group_by_alarmNo_condition" : {
"value" : 0
}
}
}
]
}
}
}
}
5 其他
5.1 报错
和当前主题不相关的一个报错,如果需要过滤被那个内嵌字段存在且不为空,使用script时,报
[script] query does not support [source]
的错,实际上是因为在script_score中少了脚本部分。参考:
5.1.1 错误写法
"filter": {
"bool": {
"must": [
{
"exists": {
"field": "eventProcessHistories.receiveDeptTreeCode.keyword"
}
},
{
"script": {
"source": "def value = doc['eventProcessHistories.receiveDeptTreeCode.keyword'].value; value != null && value.trim().length() > 0 && value.split('@').length > 1;"
}
}
]
}
}
5.1.2 修改
里面多加一层
"filter": {
"bool": {
"must": [
{
"exists": {
"field": "eventProcessHistories.receiveDeptTreeCode.keyword"
}
},
{
"script": {
"script": {
"source": "def value = doc['eventProcessHistories.receiveDeptTreeCode.keyword'].value; value != null && value.trim().length() > 0 && value.split('@').length > 1;"
}
}
}
]
}
}
标签:count,内嵌,keyword,eventProcessHistories,doc,截取,value,type,ES
From: https://www.cnblogs.com/charlton/p/18228393