'Convert string to dict in python, but string dict has no quotes and = instead of :
I have dictionaries as strings. In the string the keys and values have no quotes, and instead of having key-value pairs in the usual format (key:value), I have them like this key=value. An example of such string is here below
{created={type=FREEMIUM, title={value=Drool is love, drool is live..., _iscolumngrouppresent=true}, content={value=null, _iscolumngrouppresent=false}, status=PROCESSING, tags=[], attachments=[{payload_0={video_id=null, image_id=2efbff31-a0a6-4f4c-a163-667c4aabd111}}], visible_at={value=null, _iscolumngrouppresent=false}, author_user_id=8cfdf75d-5816-42f8-906f-8b203bb2c99f, _iscolumngrouppresent=true}, _iscolumngrouppresent=true}
Which I would like to convert to
"created": {
"type": "FREEMIUM",
"title": {
"value": "Drool is love, drool is live...",
"_iscolumngrouppresent": True
},
"content": {
"value": None,
"_iscolumngrouppresent": False
},
"status": "PROCESSING",
"tags": [],
"attachments": [
{
"payload_0": {
"video_id": None,
"image_id": "2efbff31-a0a6-4f4c-a163-667c4aabd111"
}
}
],
"visible_at": {
"value": None,
"_iscolumngrouppresent": False
},
"author_user_id": "8cfdf75d-5816-42f8-906f-8b203bb2c99f",
"_iscolumngrouppresent": True
},
"_iscolumngrouppresent": True
}
I tried to parse the string by myself, but there are cases where it fails. Mainly when a value of a key:value pair is a string with commas in it (",").
Is there a tooling already out there that I can use. Any ideas are more than welcome.
Thanks in advance
Solution 1:[1]
This parser works for your example:
class ParseError(Exception):
pass
class Parser:
def __init__(self, data):
self.data = data
self.position = 0
def parse(self, string):
self.parse_dict()
def parse_dict(self):
self.consume('{')
if self.current() == '}':
self.next()
return dict()
key_values = self.parse_key_value_list()
self.consume('}')
return dict(key_values)
def parse_key_value_list(self):
res = []
while True:
res.append(self.parse_key_value())
if not self.consume_comma_separator():
break
return res
def consume_comma_separator(self):
if self.current() != ',':
return False
self.consume(',')
self.consume_whitespace()
return True
def parse_key_value(self):
name = self.parse_name()
self.consume('=')
value = self.parse_value()
return (name, value)
def parse_name(self):
if not self.current().isalpha() and self.current() != '_':
self.error(f"Expected [a-zA-Z] or _ but found {self.current()}")
res = self.current()
self.next()
res += self.accept_while(lambda c: c.isalpha() or c.isdigit() or c=='_')
return res
def parse_value(self):
match self.current():
case '[':
return self.parse_array()
case '{':
return self.parse_dict()
case 'n':
return self.parse_null()
case 't' | 'f':
return self.parse_boolean()
# case c if c.isdigit():
# return self.parse_integer()
case _:
return self.parse_string()
def parse_array(self):
self.consume('[')
if self.current() == ']':
self.next()
return []
res = []
while True:
res.append(self.parse_value())
if not self.consume_comma_separator():
break
self.consume(']')
return res
def parse_null(self):
self.consume("null")
return None
def parse_boolean(self):
match self.current():
case 't':
self.consume('true')
return True
case 'f':
self.consume('false')
return False
case _:
self.error(f"Unexpected character {self.current()}")
def parse_string(self):
pos = self.position
while self.data[pos] not in '=]}':
pos += 1
if self.data[pos] == '=':
while self.data[pos] != ',':
pos -= 1
res = self.data[self.position : pos]
self.position = pos
return res
# return self.accept_while(lambda c: c not in '=]}')
def consume(self, string):
for c in string:
if self.current() != c:
self.error(f"Expected '{c}'; found '{self.current()}")
self.next()
def consume_whitespace(self):
while self.current().isspace():
self.next()
def next(self) -> str:
self.position += 1
def current(self) -> str:
return self.data[self.position]
def accept_while(self, predicate) -> str:
res = ""
while self.position < len(self.data):
# print(f"{self.current()=}")
if not predicate(self.current()):
break
res += self.current()
self.next()
return res
def error(self, msg):
raise ParseError(f"{msg} [position: {self.position}]")
def parse(data):
return Parser(data).parse_dict()
test_input = r"{created={type=FREEMIUM, title={value=Drool is love, drool is live..., _iscolumngrouppresent=true}, content={value=null, _iscolumngrouppresent=false}, status=PROCESSING, tags=[], attachments=[{payload_0={video_id=null, image_id=2efbff31-a0a6-4f4c-a163-667c4aabd111}}], visible_at={value=null, _iscolumngrouppresent=false}, author_user_id=8cfdf75d-5816-42f8-906f-8b203bb2c99f, _iscolumngrouppresent=true}, _iscolumngrouppresent=true}"
test_output = {
"created": {
"type": "FREEMIUM",
"title": {
"value": "Drool is love, drool is live...",
"_iscolumngrouppresent": True
},
"content": {
"value": None,
"_iscolumngrouppresent": False
},
"status": "PROCESSING",
"tags": [],
"attachments": [
{
"payload_0": {
"video_id": None,
"image_id": "2efbff31-a0a6-4f4c-a163-667c4aabd111"
}
}
],
"visible_at": {
"value": None,
"_iscolumngrouppresent": False
},
"author_user_id": "8cfdf75d-5816-42f8-906f-8b203bb2c99f",
"_iscolumngrouppresent": True
},
"_iscolumngrouppresent": True
}
def test():
assert parse(test_input) == test_output
if __name__ == '__main__':
test()
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | md2perpe |
