'Elastic painless count unique occurrences
I'm using ELK stack version 7. What I need to do is to count the unique occurence of a value in my indexes.
My indexes are created by WSO2 Identity Server version 5.10 and they are so defined:
{
"login.wso2.node.ip-2021.03.11" : {
"aliases" : {
"alias_my_login" : { }
},
"mappings" : {
"dynamic" : "true",
"_meta" : { },
"_source" : {
"includes" : [ ],
"excludes" : [ ]
},
"dynamic_date_formats" : [
"strict_date_optional_time",
"yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z"
],
"dynamic_templates" : [ ],
"date_detection" : true,
"numeric_detection" : false,
"properties" : {
"@timestamp" : {
"type" : "date",
"format" : "strict_date_optional_time"
},
"@version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"host" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"instance_IP" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"instance_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"java_class" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"level" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"log_message" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"message" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"path" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"rr" : {
"type" : "text"
},
"tags" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"tenant_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"timestamp" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1615481578543",
"number_of_shards" : "1",
"number_of_replicas" : "1",
"uuid" : "9o-UQnn-SKaj7LbhO8GYxQ",
"version" : {
"created" : "7070199"
},
"provided_name" : "login.wso2.node.ip-2021.03.11"
}
}
}
}
What I need to do is to check if in the message field I have a SAML2 Response XML and if so I need to access to one value of this XML and count the unique occurrences.
So far so good. The message field is multi mapping field. It is both text type and keyword type so I can use text type for full search and keyword type for aggregation, sorting and so on.
What I did is to write this painless script:
GET login.wso2.node.ip-2021.03.11/_search
{
"query": {
"bool": {
"filter": [
{
"script": {
"script": {
"source": "doc['message.keyword'].value.contains('SAML_MESSAGES_LOGFILE') && doc['message.keyword'].value.contains('TINIT-')"
}
}
}
]
}
},
"aggs": {
"distinct_cf_count": {
"scripted_metric": {
"params": {
"fieldName":"message"
},
"init_script": "state.list = []",
"map_script": """
//Controllo se c'è il campo message e se c'è fiscalnumber
//if(doc[params.fieldName] != null && doc[params.fieldName].size()==0 ){
// def matcher = /<saml2:Attribute FriendlyName="Codice Fiscale" Name="fiscalNumber"><saml2:AttributeValue xmlns:xs="http:\/\/www.w3.org\/2001\/XMLSchema" xmlns:xsi="http:\/\/www.w3.org\/2001\/XMLSchema-instance" xsi:type="xs:string">(.*)<\/saml2:AttributeValue><\/saml2:Attribute>/.matcher(doc[params.fieldName].value);
//if (matcher.find()) {
// state.list.add(matcher.group(1));
//}
if(doc[params.fieldName] != null && doc[params.fieldName].size()==0 && doc[params.fieldName].value.indexOf('TINIT-') > -1 ){
def valore = doc[params.fieldName].value;
def startIdx = valore.indexOf('TINIT-')+'TINIT-'.length();
state.list.add(valore.substring(startIdx, 16));
}
""",
"combine_script": "return state.list;",
"reduce_script": """
Map uniqueValueMap = new HashMap();
int count = 0;
for(shardList in states) {
if(shardList != null) {
for(key in shardList) {
if(!uniqueValueMap.containsKey(key)) {
count +=1;
uniqueValueMap.put(key, key);
}
}
}
}
return count;
"""
}
}
}
}
But I can't use regex because they are disabled and I should restart my ELK cluster in order to enable them. So I tried the contains and indexOf but I'm not able in counting the unique occurrences of this field.
Do you have any suggestion?
Thank you Angelo
EDIT MORE INFO
gave a look. This check alwaus return 0 so it's like if message.keyword is always missing
"map_script": """
//Controllo se c'è il campo message e se c'è fiscalnumber
//if(doc[params.fieldName] != null && doc[params.fieldName].size()==0 ){
// def matcher = /<saml2:Attribute FriendlyName="Codice Fiscale" Name="fiscalNumber"><saml2:AttributeValue xmlns:xs="http:\/\/www.w3.org\/2001\/XMLSchema" xmlns:xsi="http:\/\/www.w3.org\/2001\/XMLSchema-instance" xsi:type="xs:string">(.*)<\/saml2:AttributeValue><\/saml2:Attribute>/.matcher(doc[params.fieldName].value);
//if (matcher.find()) {
// state.list.add(matcher.group(1));
//}
**if( doc[params.fieldName].size()==0 ){**
** state.list.add(UUID.randomUUID().toString());**
** }**
//else{
// def valore = doc[params.fieldName].value;
// def cf = valore.splitOnToken('TINIT-')[1].substring(16);
// state.list.add(cf);
//}
""",
Do you have any suggestion? I'm really blocked here... at 1 step to the solution
Thank you
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
