'Search for a value in a nested dictionary python
Search for a value and get the parent dictionary names (keys):
Dictionary = {dict1:{
'part1': {
'.wbxml': 'application/vnd.wap.wbxml',
'.rl': 'application/resource-lists+xml',
},
'part2':
{'.wsdl': 'application/wsdl+xml',
'.rs': 'application/rls-services+xml',
'.xop': 'application/xop+xml',
'.svg': 'image/svg+xml',
},
'part3':{...}, ...
dict2:{
'part1': { '.dotx': 'application/vnd.openxmlformats-..'
'.zaz': 'application/vnd.zzazz.deck+xml',
'.xer': 'application/patch-ops-error+xml',}
},
'part2':{...},
'part3':{...},...
},...
In above dictionary I need to search values like: "image/svg+xml". Where, none of the values are repeated in the dictionary. How to search the "image/svg+xml"? so that it should return the parent keys in a dictionary { dict1:"part2" }.
Please note: Solutions should work unmodified for both Python 2.7 and Python 3.3.
Solution 1:[1]
Here's a simple recursive version:
def getpath(nested_dict, value, prepath=()):
for k, v in nested_dict.items():
path = prepath + (k,)
if v == value: # found value
return path
elif hasattr(v, 'items'): # v is a dict
p = getpath(v, value, path) # recursive call
if p is not None:
return p
Example:
print(getpath(dictionary, 'image/svg+xml'))
# -> ('dict1', 'part2', '.svg')
Solution 2:[2]
Here is a solution that works for a complex data structure of nested lists and dicts
import pprint
def search(d, search_pattern, prev_datapoint_path=''):
output = []
current_datapoint = d
current_datapoint_path = prev_datapoint_path
if type(current_datapoint) is dict:
for dkey in current_datapoint:
if search_pattern in str(dkey):
c = current_datapoint_path
c+="['"+dkey+"']"
output.append(c)
c = current_datapoint_path
c+="['"+dkey+"']"
for i in search(current_datapoint[dkey], search_pattern, c):
output.append(i)
elif type(current_datapoint) is list:
for i in range(0, len(current_datapoint)):
if search_pattern in str(i):
c = current_datapoint_path
c += "[" + str(i) + "]"
output.append(i)
c = current_datapoint_path
c+="["+ str(i) +"]"
for i in search(current_datapoint[i], search_pattern, c):
output.append(i)
elif search_pattern in str(current_datapoint):
c = current_datapoint_path
output.append(c)
output = filter(None, output)
return list(output)
if __name__ == "__main__":
d = {'dict1':
{'part1':
{'.wbxml': 'application/vnd.wap.wbxml',
'.rl': 'application/resource-lists+xml'},
'part2':
{'.wsdl': 'application/wsdl+xml',
'.rs': 'application/rls-services+xml',
'.xop': 'application/xop+xml',
'.svg': 'image/svg+xml'}},
'dict2':
{'part1':
{'.dotx': 'application/vnd.openxmlformats-..',
'.zaz': 'application/vnd.zzazz.deck+xml',
'.xer': 'application/patch-ops-error+xml'}}}
d2 = {
"items":
{
"item":
[
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{"id": "1001", "type": "Regular"},
{"id": "1002", "type": "Chocolate"},
{"id": "1003", "type": "Blueberry"},
{"id": "1004", "type": "Devil's Food"}
]
},
"topping":
[
{"id": "5001", "type": "None"},
{"id": "5002", "type": "Glazed"},
{"id": "5005", "type": "Sugar"},
{"id": "5007", "type": "Powdered Sugar"},
{"id": "5006", "type": "Chocolate with Sprinkles"},
{"id": "5003", "type": "Chocolate"},
{"id": "5004", "type": "Maple"}
]
},
...
]
}
}
pprint.pprint(search(d,'svg+xml','d'))
>> ["d['dict1']['part2']['.svg']"]
pprint.pprint(search(d2,'500','d2'))
>> ["d2['items']['item'][0]['topping'][0]['id']",
"d2['items']['item'][0]['topping'][1]['id']",
"d2['items']['item'][0]['topping'][2]['id']",
"d2['items']['item'][0]['topping'][3]['id']",
"d2['items']['item'][0]['topping'][4]['id']",
"d2['items']['item'][0]['topping'][5]['id']",
"d2['items']['item'][0]['topping'][6]['id']"]
Solution 3:[3]
Here are two similar quick and dirty ways of doing this type of operation. The function find_parent_dict1 uses list comprehension but if you are uncomfortable with that then find_parent_dict2 uses the infamous nested for loops.
Dictionary = {'dict1':{'part1':{'.wbxml':'1','.rl':'2'},'part2':{'.wbdl':'3','.rs':'4'}},'dict2':{'part3':{'.wbxml':'5','.rl':'6'},'part4':{'.wbdl':'1','.rs':'10'}}}
value = '3'
def find_parent_dict1(Dictionary):
for key1 in Dictionary.keys():
item = {key1:key2 for key2 in Dictionary[key1].keys() if value in Dictionary[key1][key2].values()}
if len(item)>0:
return item
find_parent_dict1(Dictionary)
def find_parent_dict2(Dictionary):
for key1 in Dictionary.keys():
for key2 in Dictionary[key1].keys():
if value in Dictionary[key1][key2].values():
print {key1:key2}
find_parent_dict2(Dictionary)
Solution 4:[4]
Traverses a nested dict looking for a particular value. When success is achieved the full key path to the value is printed. I left all the comments and print statements for pedagogical purposes (this isn't production code!)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 24 17:16:46 2022
@author: wellington
"""
class Tree(dict):
"""
allows autovivification as in Perl hashes
"""
def __missing__(self, key):
value = self[key] = type(self)()
return value
# tracking the key sequence when seeking the target
key_list = Tree()
# dict storing the target success result
success = Tree()
# example nested dict of dicts and lists
E = {
'AA':
{
'BB':
{'CC':
{
'DD':
{
'ZZ':'YY',
'WW':'PP'
},
'QQ':
{
'RR':'SS'
},
},
'II':
{
'JJ':'KK'
},
'LL':['MM', 'GG', 'TT']
}
}
}
def find_keys_from_value(data, target):
"""
recursive function -
given a value it returns all the keys in the path to that value within
the dict "data"
there are many paths and many false routes
at the end of a given path if success has not been achieved
the function discards keys to get back to the next possible path junction
"""
print(f"the number of keys in the local dict is {len(data)}")
key_counter = 0
for key in data:
key_counter += 1
# if target has been located stop iterating through keys
if success[target] == 1:
break
else:
# eliminate prior key from path that did not lead to success
if key_counter > 1:
k_list.pop()
# add key to new path
k_list.append(key)
print(f"printing k_list after append{k_list}")
# if target located set success[target] = 1 and exit
if key == target or data[key] == target:
key_list[target] = k_list
success[target] = 1
break
# if the target has not been located check to see if the value
# associated with the new key is a dict and if so return to the
# recursive function with the new dict as "data"
elif isinstance(data[key], dict):
print(f"\nvalue is dict\n {data[key]}")
find_keys_from_value(data[key], target)
# check to see if the value associated with the new key is a list
elif isinstance(data[key], list):
# print("\nv is list\n")
# search through the list
for i in data[key]:
# check to see if the list element is a dict
# and if so return to the recursive function with
# the new dict as "data
if isinstance(i, dict):
find_keys_from_value(i, target)
# check to see if each list element is the target
elif i == target:
print(f"list entry {i} is target")
success[target] = 1
key_list[target] = k_list
elif i != target:
print(f"list entry {i} is not target")
print(f"printing k_list before pop_b {k_list}")
print(f"popping off key_b {key}")
# so if value is not a key and not a list and not the target then
# discard the key from the key list
elif data[key] != target:
print(f"value {data[key]} is not target")
print(f"printing k_list before removing key_before {k_list}")
print(f"removing key_c {key}")
k_list.remove(key)
# select target values
values = ["PP", "SS", "KK", "TT"]
success = {}
for target in values:
print(f"\nlooking for target {target}")
success[target] = 0
k_list = []
find_keys_from_value(E, target)
print(f"\nprinting key_list for target {target}")
print(f"{key_list[target]}\n")
print("\n****************\n\n")
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | jfs |
| Solution 2 | |
| Solution 3 | BushMinusZero |
| Solution 4 | Do it right |
