'Convert string to dict in python, but string dict has no quotes and = instead of :

I have dictionaries as strings. In the string the keys and values have no quotes, and instead of having key-value pairs in the usual format (key:value), I have them like this key=value. An example of such string is here below

{created={type=FREEMIUM, title={value=Drool is love, drool is live..., _iscolumngrouppresent=true}, content={value=null, _iscolumngrouppresent=false}, status=PROCESSING, tags=[], attachments=[{payload_0={video_id=null, image_id=2efbff31-a0a6-4f4c-a163-667c4aabd111}}], visible_at={value=null, _iscolumngrouppresent=false}, author_user_id=8cfdf75d-5816-42f8-906f-8b203bb2c99f, _iscolumngrouppresent=true}, _iscolumngrouppresent=true}

Which I would like to convert to

    "created": {
        "type": "FREEMIUM", 
        "title": {
            "value": "Drool is love, drool is live...",
            "_iscolumngrouppresent": True
        }, 
        "content": {
            "value": None, 
            "_iscolumngrouppresent": False
        }, 
        "status": "PROCESSING", 
        "tags": [], 
        "attachments": [
            {
                "payload_0": {
                    "video_id": None, 
                    "image_id": "2efbff31-a0a6-4f4c-a163-667c4aabd111"
                }
            }
        ], 
        "visible_at": {
            "value": None, 
            "_iscolumngrouppresent": False
        }, 
        "author_user_id": "8cfdf75d-5816-42f8-906f-8b203bb2c99f", 
        "_iscolumngrouppresent": True
    },
    "_iscolumngrouppresent": True
}

I tried to parse the string by myself, but there are cases where it fails. Mainly when a value of a key:value pair is a string with commas in it (",").

Is there a tooling already out there that I can use. Any ideas are more than welcome.

Thanks in advance



Solution 1:[1]

This parser works for your example:

class ParseError(Exception):
    pass

class Parser:
    def __init__(self, data):
        self.data = data
        self.position = 0

    def parse(self, string):
        self.parse_dict()

    def parse_dict(self):
        self.consume('{')
        if self.current() == '}':
            self.next()
            return dict()
        key_values = self.parse_key_value_list()
        self.consume('}')
        return dict(key_values)

    def parse_key_value_list(self):
        res = []
        while True:
            res.append(self.parse_key_value())
            if not self.consume_comma_separator():
                break
        return res

    def consume_comma_separator(self):
        if self.current() != ',':
            return False
        self.consume(',')
        self.consume_whitespace()
        return True

    def parse_key_value(self):
        name = self.parse_name()
        self.consume('=')
        value = self.parse_value()
        return (name, value)

    def parse_name(self):
        if not self.current().isalpha() and self.current() != '_':
            self.error(f"Expected [a-zA-Z] or _ but found {self.current()}")

        res = self.current()
        self.next()

        res += self.accept_while(lambda c: c.isalpha() or c.isdigit() or c=='_')
        return res

    def parse_value(self):
        match self.current():
            case '[':
                return self.parse_array()
            case '{':
                return self.parse_dict()
            case 'n':
                return self.parse_null()
            case 't' | 'f':
                return self.parse_boolean()
            # case c if c.isdigit():
            #     return self.parse_integer()
            case _:
                return self.parse_string()

    def parse_array(self):
        self.consume('[')

        if self.current() == ']':
            self.next()
            return []

        res = []
        while True:
            res.append(self.parse_value())
            if not self.consume_comma_separator():
                break
        self.consume(']')
        return res

    def parse_null(self):
        self.consume("null")
        return None

    def parse_boolean(self):
        match self.current():
            case 't':
                self.consume('true')
                return True
            case 'f':
                self.consume('false')
                return False
            case _:
                self.error(f"Unexpected character {self.current()}")

    def parse_string(self):
        pos = self.position
        while self.data[pos] not in '=]}':
            pos += 1
        if self.data[pos] == '=':
            while self.data[pos] != ',':
                pos -= 1
        res = self.data[self.position : pos]
        self.position = pos
        return res
        # return self.accept_while(lambda c: c not in '=]}')

    def consume(self, string):
        for c in string:
            if self.current() != c:
                self.error(f"Expected '{c}'; found '{self.current()}")
            self.next()

    def consume_whitespace(self):
        while self.current().isspace():
            self.next()

    def next(self) -> str:
        self.position += 1

    def current(self) -> str:
        return self.data[self.position]

    def accept_while(self, predicate) -> str:
        res = ""
        while self.position < len(self.data):
            # print(f"{self.current()=}")
            if not predicate(self.current()):
                break
            res += self.current()
            self.next()
        return res

    def error(self, msg):
        raise ParseError(f"{msg} [position: {self.position}]")


def parse(data):
    return Parser(data).parse_dict()


test_input = r"{created={type=FREEMIUM, title={value=Drool is love, drool is live..., _iscolumngrouppresent=true}, content={value=null, _iscolumngrouppresent=false}, status=PROCESSING, tags=[], attachments=[{payload_0={video_id=null, image_id=2efbff31-a0a6-4f4c-a163-667c4aabd111}}], visible_at={value=null, _iscolumngrouppresent=false}, author_user_id=8cfdf75d-5816-42f8-906f-8b203bb2c99f, _iscolumngrouppresent=true},  _iscolumngrouppresent=true}"
test_output = {
    "created": {
        "type": "FREEMIUM",
        "title": {
            "value": "Drool is love, drool is live...",
            "_iscolumngrouppresent": True
        },
        "content": {
            "value": None,
            "_iscolumngrouppresent": False
        },
        "status": "PROCESSING",
        "tags": [],
        "attachments": [
            {
                "payload_0": {
                    "video_id": None,
                    "image_id": "2efbff31-a0a6-4f4c-a163-667c4aabd111"
                }
            }
        ],
        "visible_at": {
            "value": None,
            "_iscolumngrouppresent": False
        },
        "author_user_id": "8cfdf75d-5816-42f8-906f-8b203bb2c99f",
        "_iscolumngrouppresent": True
    },
    "_iscolumngrouppresent": True
}

def test():
    assert parse(test_input) == test_output


if __name__ == '__main__':
    test()

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 md2perpe