ES截取内嵌字段分组

标签：count 内嵌 keyword eventProcessHistories doc 截取 value type ES

1 背景

根据ES的内嵌字段中的@符号截取第n个@符号之前的字符进行分组统计文档数量。该字段格式为：@xxx@yyy@zzz@、@aaa11@bbb222@ccc333@

2 映射

{
  "ads_hmap_event_info" : {
    "mappings" : {
      "properties" : {
        "CJSJ" : {
          "type" : "keyword"
        },
        "CXLX" : {
          "type" : "keyword"
        },
        "CZLX" : {
          "type" : "keyword"
        },
        "CZYJ" : {
          "type" : "keyword"
        },
        "DWJWD" : {
          "type" : "geo_point"
        },
        "DXLXBM" : {
          "type" : "keyword"
        },
        "DXLXMC" : {
          "type" : "keyword"
        },
        "DYCLRYPZID" : {
          "type" : "keyword"
        },
        "DYCLRYPZMC" : {
          "type" : "keyword"
        },
        "DYCLRYPZMCLJ" : {
          "type" : "keyword"
        },
        "FJBM" : {
          "type" : "keyword"
        },
        "FJMC" : {
          "type" : "keyword"
        },
        "FSDZ" : {
          "type" : "keyword"
        },
        "FSSJ" : {
          "type" : "keyword"
        },
        "GXSJ" : {
          "type" : "keyword"
        },
        "HBGD" : {
          "type" : "double"
        },
        "JD" : {
          "type" : "double"
        },
        "JDALBS" : {
          "type" : "integer"
        },
        "JKDBM" : {
          "type" : "keyword"
        },
        "JKDMC" : {
          "type" : "keyword"
        },
        "KZXX" : {
          "type" : "text"
        },
        "LCGXSJ" : {
          "type" : "keyword"
        },
        "LCKSSJ" : {
          "type" : "keyword"
        },
        "LCSLID" : {
          "type" : "keyword"
        },
        "LCZT" : {
          "type" : "keyword"
        },
        "LXDH" : {
          "type" : "keyword"
        },
        "MXBM" : {
          "type" : "keyword"
        },
        "MXMC" : {
          "type" : "keyword"
        },
        "MXSSBMDM" : {
          "type" : "keyword"
        },
        "MXSSBMMC" : {
          "type" : "keyword"
        },
        "PCSDM" : {
          "type" : "keyword"
        },
        "PCSMC" : {
          "type" : "keyword"
        },
        "QSJZSJ" : {
          "type" : "keyword"
        },
        "SBBM" : {
          "type" : "keyword"
        },
        "SBMC" : {
          "type" : "keyword"
        },
        "SFJZFJWJ" : {
          "type" : "integer"
        },
        "SFQSCS" : {
          "type" : "integer"
        },
        "SFWJCS" : {
          "type" : "integer"
        },
        "SJBH" : {
          "type" : "keyword"
        },
        "SJBT" : {
          "type" : "text"
        },
        "SJDJ" : {
          "type" : "keyword"
        },
        "SJDM" : {
          "type" : "keyword"
        },
        "SJDXBM" : {
          "type" : "keyword"
        },
        "SJDXMC" : {
          "type" : "keyword"
        },
        "SJLX" : {
          "type" : "keyword"
        },
        "SJLY" : {
          "type" : "keyword"
        },
        "SJMC" : {
          "type" : "keyword"
        },
        "SJTP" : {
          "type" : "keyword"
        },
        "SJXQ" : {
          "type" : "text"
        },
        "SJZT" : {
          "type" : "integer"
        },
        "SJ_ZJ" : {
          "type" : "keyword"
        },
        "SSXZQHDM" : {
          "type" : "keyword"
        },
        "SSXZQHMC" : {
          "type" : "keyword"
        },
        "WD" : {
          "type" : "double"
        },
        "WJJZSJ" : {
          "type" : "keyword"
        },
        "WJZT" : {
          "type" : "keyword"
        },
        "YJCFSJ" : {
          "type" : "keyword"
        },
        "ZXCLCZLX" : {
          "type" : "integer"
        },
        "ZXCLLCYJ" : {
          "type" : "keyword"
        },
        "ZXCLRBMID" : {
          "type" : "keyword"
        },
        "ZXCLRBMMC" : {
          "type" : "keyword"
        },
        "ZXCLRID" : {
          "type" : "keyword"
        },
        "ZXCLRMC" : {
          "type" : "keyword"
        },
        "ZXCLRYPZID" : {
          "type" : "keyword"
        },
        "ZXCLRYPZMC" : {
          "type" : "keyword"
        },
        "criticalValue" : {
          "type" : "long"
        },
        "eventProcessHistories" : {
          "type" : "nested",
          "properties" : {
            "alarmNo" : {
              "type" : "keyword"
            },
            "branchComplete" : {
              "type" : "integer"
            },
            "completeStatus" : {
              "type" : "keyword"
            },
            "createTime" : {
              "type" : "keyword"
            },
            "effectType" : {
              "type" : "keyword"
            },
            "endProcess" : {
              "type" : "integer"
            },
            "files" : {
              "type" : "keyword"
            },
            "isStatistics" : {
              "type" : "integer"
            },
            "modelNo" : {
              "type" : "keyword"
            },
            "operatorDeptId" : {
              "type" : "keyword"
            },
            "operatorDeptName" : {
              "type" : "keyword"
            },
            "operatorDeptTree" : {
              "type" : "keyword"
            },
            "operatorId" : {
              "type" : "keyword"
            },
            "operatorJudgeCode" : {
              "type" : "keyword"
            },
            "operatorJudgeName" : {
              "type" : "keyword"
            },
            "operatorJudgeTreeCode" : {
              "type" : "keyword"
            },
            "operatorJudgeTreeName" : {
              "type" : "keyword"
            },
            "operatorName" : {
              "type" : "keyword"
            },
            "operatorOpin" : {
              "type" : "keyword"
            },
            "operatorStatus" : {
              "type" : "integer"
            },
            "processId" : {
              "type" : "keyword"
            },
            "processTimeout" : {
              "type" : "integer"
            },
            "receiveDeptId" : {
              "type" : "keyword"
            },
            "receiveDeptName" : {
              "type" : "keyword"
            },
            "receiveDeptTreeCode" : {
              "type" : "keyword"
            },
            "receiveDeptTreeName" : {
              "type" : "keyword"
            },
            "receiveGroupTypeCode" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "receiveUserId" : {
              "type" : "keyword"
            },
            "receiveUserName" : {
              "type" : "keyword"
            }
          }
        },
        "hasUrged" : {
          "type" : "integer"
        }
      }
    }
  }
}

3 如果只是截取固定长度

如果需求是固定截取字段的前n个字符进行分组，可以直接用substring方法

注意先判断被截取字段非空

3.1 示例

{
	"size": 0,
	"query": {
		"bool": {
			"must": [
				{
					"nested": {
						"query": {
							"bool": {
								"must": [
									{
										"term": {
											"eventProcessHistories.isStatistics": {
												"value": "1",
												"boost": 1.0
											}
										}
									}
								],
								"adjust_pure_negative": true,
								"boost": 1.0
							}
						},
						"path": "eventProcessHistories",
						"ignore_unmapped": false,
						"score_mode": "none",
						"boost": 1.0
					}
				}
			],
			"filter": [
				{
					"range": {
						"YJCFSJ": {
							"from": "2024-04-29T00:00:00.000Z",
							"to": "2024-05-29T23:59:59.000Z",
							"include_lower": true,
							"include_upper": true,
							"boost": 1.0
						}
					}
				}
			],
			"adjust_pure_negative": true,
			"boost": 1.0
		}
	},
	"aggregations": {
		"nested_eventProcessHistories": {
			"nested": {
				"path": "eventProcessHistories"
			},
			"aggregations": {
				"group_by_receive_dept_tree_code": {
					"terms": {
						"script": {
            "source": "if (doc['eventProcessHistories.receiveDeptTreeCode'].size() > 0 && doc['eventProcessHistories.receiveDeptTreeCode'].value.length() >= 58) { doc['eventProcessHistories.receiveDeptTreeCode'].value.substring(0, 58) } else { '' }",
            "lang": "painless"
        },
						"size": 8,
						"min_doc_count": 1,
						"shard_min_doc_count": 0,
						"show_term_doc_count_error": false,
						"order": [
							{
								"_count": "desc"
							},
							{
								"_key": "asc"
							}
						]
					},
					"aggregations": {
						"group_by_alarm_no": {
							"cardinality": {
								"field": "eventProcessHistories.alarmNo"
							}
						},
						"handleAlarmNum_results": {
							"filter": {
								"bool": {
									"must": [
										{
											"terms": {
												"eventProcessHistories.operatorStatus": [
													-1,
													0
												],
												"boost": 1.0
											}
										},
										{
											"term": {
												"eventProcessHistories.endProcess": {
													"value": 0,
													"boost": 1.0
												}
											}
										}
									],
									"adjust_pure_negative": true,
									"boost": 1.0
								}
							},
							"aggregations": {
								"group_by_alarmNo_condition": {
									"cardinality": {
										"field": "eventProcessHistories.alarmNo"
									}
								}
							}
						},
						"noSignAlarmNum_results": {
							"filter": {
								"bool": {
									"must": [
										{
											"terms": {
												"eventProcessHistories.operatorStatus": [
													-2,
													1
												],
												"boost": 1.0
											}
										},
										{
											"term": {
												"eventProcessHistories.endProcess": {
													"value": 0,
													"boost": 1.0
												}
											}
										}
									],
									"adjust_pure_negative": true,
									"boost": 1.0
								}
							},
							"aggregations": {
								"group_by_alarmNo_condition": {
									"cardinality": {
										"field": "eventProcessHistories.alarmNo"
									}
								}
							}
						}
					}
				}
			}
		}
	}
}

4 截取第n个@符号之前字符

4.1 思路

将字段根据@符号先拆分，然后再组装起来

4.2 问题

ES不支持使用String中的split方法，报dynamic method [java.lang.String, split/1] not found的错

4.3 解决

使用ES的 /pattern/ 模式分隔字符串。

String#split is not whitelisted because it creates dynamic Pattern objects internally. Use Pattern#split instead like /\//.split(urlAsString[0]).

参考：

4.4 示例

注意对被拆分的字段判断非空

示例截取第3个@符号之前的字符进行分组

GET /ads_hmap_event_info/_doc/_search
{
	"size": 0,
	"query": {
		"bool": {
			"must": [
				{
					"nested": {
						"path": "eventProcessHistories",
						"query": {
							"bool": {
								"must": [
									{
										"term": {
											"eventProcessHistories.isStatistics": "1"
										}
									},
									{
										"exists": {
											"field": "eventProcessHistories.receiveDeptTreeCode"
										}
									}
								]
							}
						}
					}
				},
				{
					"range": {
						"YJCFSJ": {
							"from": "2024-04-29T00:00:00.000Z",
							"to": "2024-05-29T23:59:59.000Z",
							"include_lower": true,
							"include_upper": true
						}
					}
				}
			]
		}
	},
	"aggregations": {
		"nested_eventProcessHistories": {
			"nested": {
				"path": "eventProcessHistories"
			},
			"aggregations": {
				"group_by_receive_dept_tree_code": {
					"terms": {
						"script": {
            				"source": """ 
            				if(doc['eventProcessHistories.receiveDeptTreeCode'].empty) { return 'missing'; } else { 
def str = doc['eventProcessHistories.receiveDeptTreeCode'].value;
def parts = /@/.split(str);
def groupKey = '@' + parts[1] + '@' + parts[2] + '@';
return groupKey;
}
            				"""
        				},
						"size": 8,
						"min_doc_count": 1,
						"shard_min_doc_count": 0,
						"show_term_doc_count_error": false,
						"order": [
							{
								"_count": "desc"
							},
							{
								"_key": "asc"
							}
						]
					},
					"aggregations": {
						"group_by_alarm_no": {
							"cardinality": {
								"field": "eventProcessHistories.alarmNo"
							}
						},
						"handleAlarmNum_results": {
							"filter": {
								"bool": {
									"must": [
										{
											"terms": {
												"eventProcessHistories.operatorStatus": [
													-1,
													0
												]
											}
										},
										{
											"term": {
												"eventProcessHistories.endProcess": 0
											}
										}
									]
								}
							},
							"aggregations": {
								"group_by_alarmNo_condition": {
									"cardinality": {
										"field": "eventProcessHistories.alarmNo"
									}
								}
							}
						},
						"noSignAlarmNum_results": {
							"filter": {
								"bool": {
									"must": [
										{
											"terms": {
												"eventProcessHistories.operatorStatus": [
													-2,
													1
												]
											}
										},
										{
											"term": {
												"eventProcessHistories.endProcess": 0
											}
										}
									]
								}
							},
							"aggregations": {
								"group_by_alarmNo_condition": {
									"cardinality": {
										"field": "eventProcessHistories.alarmNo"
									}
								}
							}
						}
					}
				}
			}
		}
	}
}

4.5 返回

{
  "took" : 87,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 32,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "nested_eventProcessHistories" : {
      "doc_count" : 54,
      "group_by_receive_dept_tree_code" : {
        "doc_count_error_upper_bound" : 0,
        "sum_other_doc_count" : 0,
        "buckets" : [
          {
            "key" : "@330100000000@1961017e22c885cc5e8f03cc62cf4e3d4LfvGPDWWwt@",
            "doc_count" : 38,
            "group_by_alarm_no" : {
              "value" : 31
            },
            "noSignAlarmNum_results" : {
              "doc_count" : 26,
              "group_by_alarmNo_condition" : {
                "value" : 26
              }
            },
            "handleAlarmNum_results" : {
              "doc_count" : 2,
              "group_by_alarmNo_condition" : {
                "value" : 2
              }
            }
          },
          {
            "key" : "@330100000000@330108000000@",
            "doc_count" : 9,
            "group_by_alarm_no" : {
              "value" : 2
            },
            "noSignAlarmNum_results" : {
              "doc_count" : 0,
              "group_by_alarmNo_condition" : {
                "value" : 0
              }
            },
            "handleAlarmNum_results" : {
              "doc_count" : 0,
              "group_by_alarmNo_condition" : {
                "value" : 0
              }
            }
          },
          {
            "key" : "missing",
            "doc_count" : 7,
            "group_by_alarm_no" : {
              "value" : 4
            },
            "noSignAlarmNum_results" : {
              "doc_count" : 0,
              "group_by_alarmNo_condition" : {
                "value" : 0
              }
            },
            "handleAlarmNum_results" : {
              "doc_count" : 0,
              "group_by_alarmNo_condition" : {
                "value" : 0
              }
            }
          }
        ]
      }
    }
  }
}

5 其他

5.1 报错

和当前主题不相关的一个报错，如果需要过滤被那个内嵌字段存在且不为空，使用script时，报[script] query does not support [source]的错，实际上是因为在script_score中少了脚本部分。

参考：

https://stackoverflow.com/questions/50969666/script-score-query-does-not-support-source

https://discuss.elastic.co/t/script-query-does-not-support-params/116631

5.1.1 错误写法


"filter": {
						"bool": {
							"must": [
								{
									"exists": {
										"field": "eventProcessHistories.receiveDeptTreeCode.keyword"
									}
								},
								{
									"script": {
										"source": "def value = doc['eventProcessHistories.receiveDeptTreeCode.keyword'].value; value != null && value.trim().length() > 0 && value.split('@').length > 1;"
									}
								}
							]
						}
					}

5.1.2 修改

里面多加一层

"filter": {
						"bool": {
							"must": [
								{
									"exists": {
										"field": "eventProcessHistories.receiveDeptTreeCode.keyword"
									}
								},
								{
									"script": {
									  "script": {
										"source": "def value = doc['eventProcessHistories.receiveDeptTreeCode.keyword'].value; value != null && value.trim().length() > 0 && value.split('@').length > 1;"
									}
									  }
									  
								}
							]
						}
					}

标签：count,内嵌,keyword,eventProcessHistories,doc,截取,value,type,ES
From： https://www.cnblogs.com/charlton/p/18228393

ES截取内嵌字段分组

ES截取内嵌字段分组

1 背景

2 映射

3 如果只是截取固定长度

3.1 示例

4 截取第n个@符号之前字符

4.1 思路

4.2 问题

4.3 解决

4.4 示例

4.5 返回

5 其他

5.1 报错

5.1.1 错误写法

5.1.2 修改

相关文章

赞助商

阅读排行