'Having type error in python while converting json to excel

I am trying to convert json to excel file but I am having a type error and I am not been able to solve the error. The code is first converting json data into dictionary(which I need also for clips facts) than convert it into excel. Any help would be appreciated. Thank you in advance. Following is my code:

    from io import BytesIO
    import sys
    sys.path.append('/usr/lib/python3/dist-packages')
    import json
    import openpyxl
    from openpyxl import load_workbook
            
    import logging
    logger = logging.getLogger('ExtractJSON')
    
    
    class ExtractJSON(object):
    
    def __init__(self):
        """ Initialisierung """
        pass

    def convert(self, data):
        """ PDF Extract API """
        #jsondata = self.json2excel(data)
        jsondata = self.json2csv(data)
        return jsondata
        
    #=======================================================================
    #   JSON to Excel
    #=======================================================================
        
    def json2excel(self, data):
        """ JSON to Excel """
        header = ["Seite","ID","Text","Pfad","Fett","Kursiv","Unterstrichen","Versal","PT","Alignment","BlockAlign","InlineAlign","Col","Row","Width",\
                  "Height","LineHeight","SpaceAfter","Placement","Bemerkung","Bounds","ClipBounds","BBox","Font","HasClip","Lang"]
        jsondata = json.loads(data)
        elements = jsondata['elements']
        excelrow = 1
        sid = 0
        hd = 1
        wb = openpyxl.Workbook()
        sheet = wb.active
        
        for head in header:
            sheet.cell(excelrow, hd).value = head
            hd += 1

        for elm in elements:
            sid += 1
            excelrow += 1
            cl = 0
            row = self.createRow(elm, sid)
            if row is None:
                sheet.cell(excelrow, 1).value = "....."
                continue
            for column in header:  # TODO: enumerate!
                cl += 1
                sheet.cell(excelrow, cl).value = row.get(column, '?????')
            kids = elm.get('Kids', [])
            kid = 0
            for kidelm in kids:
                kid += 1
                excelrow += 1
                cl = 0
                sidstr = f"{sid}.{kid}"
                row = self.createRow(kidelm, sidstr)
                for column in header:  # TODO: enumerate! # TODO: refactor
                    cl += 1
                    sheet.cell(excelrow, cl).value = row.get(column, '?????')
        # FIXME: Check: Do we have recursive 'Kids' in the JSON file?
        wbresult = BytesIO()
        wb.save(wbresult)
        return wbresult.getvalue()

    def createRow(self, elm, sid):
        """ For Excel """
        try:
            page = elm.get('Page', '')
            if page or page == 0: 
                page += 1
            xpath = elm['Path']
            text = elm.get('Text', elm.get('text', ''))
            font = elm.get('Font', {})
            if not text and not font and xpath.find("Figure") > -1:  # TODO temporary
                logger.info("Figure found, no font and no text, continue..")
                return ""
            bold = "x" if font.get('weight', '') == 700 else ''
            italic = "x" if font.get('italic', '') else ''
            underline = font.get('underline', '')  # TODO No example yet
            versal = font.get('capital', '')  # TODO No example yet
            fontsize = elm.get('TextSize', '')
            if fontsize and (abs(round(fontsize) - fontsize) < 0.10):
                fontsize = round(fontsize)
            attributes = elm.get('attributes', {})
            alignment = attributes.get('TextAlign', '')
            lineHeight = attributes.get('LineHeight', '')
            spaceAfter = attributes.get('SpaceAfter', '')
            placement = attributes.get('Placement', '')
            note = ""
            bounds = elm.get('Bounds', '')
            clipBounds = elm.get('ClipBounds', '')
            if bounds:
                bounds = ", ".join([str(round(bd)) for bd in bounds])
            if clipBounds:
                clipBounds = ", ".join([str(round(bd)) for bd in clipBounds])
            font = str(font)
            hasClip = elm.get('HasClip', '')
            lang = elm.get('Lang', '')
            # Tabelle
            blockAlign = attributes.get('BlockAlign', '')
            inlineAlign = attributes.get('InlineAlign', '')
            col = attributes.get('ColIndex', '')
            row = attributes.get('RowIndex', '')
            if col or col == 0: 
                col += 1
            if row or row == 0:
                row += 1
            width = attributes.get('width', '')
            height = attributes.get('height', '')
            bBox = attributes.get('BBox', '')
            if bBox:
                bBox = ", ".join([str(round(bd)) for bd in bBox])
            #
            result = {'Seite':page, 'ID':sid, 'Text':text, 'Pfad':xpath, 'Fett':bold, 'Kursiv':italic, 'Unterstrichen':underline,\
                     'Versal':versal, 'PT':fontsize, 'Alignment':alignment, 'BlockAlign':blockAlign, 'InlineAlign':inlineAlign,\
                     'Col':col, 'Row':row, 'Width':width, 'Height':height, 'LineHeight':lineHeight, 'SpaceAfter':spaceAfter,\
                     'Placement':placement, 'Bemerkung':note, 'Bounds':bounds, 'ClipBounds':clipBounds, 'BBox':bBox,\
                     'Font':font, 'HasClip':hasClip, 'Lang':lang}
        except Exception as e:
            logger.exception(e)
            logger.info(elm)
            return ""
        return result
        
    #=======================================================================
    #   JSON to CSV  (Alternative for JSON to Excel)
    #=======================================================================
        
    def json2csv(self, data):
        """ JSON to CSV """
        result = "Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;"\
                 "Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang\n"
        jsondata = json.loads(data)
        elements = jsondata['elements']
        sid = 0
        for elm in elements:
            sid += 1
            result += self.createRow(elm, sid)
            kids = elm.get('Kids', [])
            kid = 0
            for kidelm in kids:
                kid += 1
                sidstr = f"{sid}.{kid}"
                result += self.createRowForCSV(kidelm, sidstr)
        filename=json2excel.csv")
        return result
        
    def createRowForCSV(self, elm, sid):
        """ """
        try:
            page = elm.get('Page', '')
            if page or page == 0: 
                page += 1
            xpath = elm['Path']
            text = elm.get('Text', elm.get('text', ''))
            font = elm.get('Font', {})
            if not text and not font and xpath.find("Figure") > -1:  # TODO temporary
                logger.info("Figure found, no font and no text, continue..")
                return ""
            bold = "x" if font.get('weight', '') == 700 else ''
            italic = "x" if font.get('italic', '') else ''
            underline = font.get('underline', '')  # TODO No example yet
            versal = font.get('capital', '')  # TODO No example yet
            fontsize = elm.get('TextSize', '')
            if fontsize and (abs(round(fontsize) - fontsize) < 0.10):
                fontsize = round(fontsize)
            attributes = elm.get('attributes', {})
            alignment = attributes.get('TextAlign', '')
            lineHeight = attributes.get('LineHeight', '')
            spaceAfter = attributes.get('SpaceAfter', '')
            placement = attributes.get('Placement', '')
            note = ""
            bounds = elm.get('Bounds', '')
            clipBounds = elm.get('ClipBounds', '')
            if bounds:
                bounds = ", ".join([str(round(bd)) for bd in bounds])
            if clipBounds:
                clipBounds = ", ".join([str(round(bd)) for bd in clipBounds])
            font = str(font)
            hasClip = elm.get('HasClip', '')
            lang = elm.get('Lang', '')
            # Tabelle
            blockAlign = attributes.get('BlockAlign', '')
            inlineAlign = attributes.get('InlineAlign', '')
            col = attributes.get('ColIndex', '')
            row = attributes.get('RowIndex', '')
            if col or col == 0: 
                col += 1
            if row or row == 0:
                row += 1
            width = attributes.get('width', '')
            height = attributes.get('height', '')
            bBox = attributes.get('BBox', '')
            if bBox:
                bBox = ", ".join([str(round(bd)) for bd in bBox])
            #
            result = f"{page};'{sid}';'{text}';{xpath};{bold};{italic};{underline};{versal};{fontsize};{alignment};{blockAlign};{inlineAlign};{col};{row};\
                        {width};{height};'{lineHeight}';'{spaceAfter}';{placement};{note};{bounds};{clipBounds};{bBox};{font};{hasClip};{lang}\n"
        except Exception as e:
            logger.exception(e)
            logger.info(elm)
            return ""
        return result

INPUTPATH = "/home/abc/Clips/JSON/structuredData.json"
OUTPUTPATH = "/home/abc/Clips/JSON/result.xlsx"
        
if __name__ == '__main__':
    converter = ExtractJSON()
    f = open(INPUTPATH, "r"); data = f.read(); f.close()
    print ("JSON converting..")
    result = converter.convert(data)
    g = open(OUTPUTPATH, "wb"); g.write(result); g.close()
    print (f"Excel stored on {OUTPUTPATH}")
    sys.exit(0)

The error I am getting is:

JSON converting..
Traceback (most recent call last):
  File "/home/abc/Clips/JSON/json2excel.py", line 222, in <module>
    result = converter.convert(data)
  File "/home/abc/Clips/JSON/json2excel.py", line 23, in convert
    jsondata = self.json2csv(data)
  File "/home/abc/Clips/JSON/json2excel.py", line 146, in json2csv
    result += self.createRow(elm, sid)
TypeError: can only concatenate str (not "dict") to str


Solution 1:[1]

Long story short, this is the minimal-reproducible-example of your code:

class ExtractJSON:
    def createRowForCSV(self):
        return {
            'Seite':1, 'ID':2, 'Text':3, 'Pfad':4, 'Fett':5, 'Kursiv':6, 'Unterstrichen':7,
            'Versal':9, 'PT':0, 'Alignment':10, 'BlockAlign':11, 'InlineAlign':12,
            'Col':13, 'Row':14, 'Width':15, 'Height':16, 'LineHeight':17, 'SpaceAfter':18,
            'Placement':19, 'Bemerkung':20, 'Bounds':21, 'ClipBounds':22, 'BBox':23,
            'Font':2.4, 'HasClip':"twenty-five", 'Lang':"twenty-6"
        }

    def json2csv(self):
        result = "Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang\n"
        result += self.createRowForCSV()
        return result

Output

Traceback (most recent call last):
  File "Main.py", line 28, in <module>
    print(ExtractJSON().json2csv())
  File "Main.py", line 17, in json2csv
    result += self.createRowForCSV()
TypeError: can only concatenate str (not "dict") to str

Now, we can clearly see that the reason of the error is that result is an str but yet you are appending to it the output of createRowForCSV which is a dict. To visualize this further, you are like doing:

"Some text" + {"another": "one"}

which obviously would fail.

Solution 1:

Manually iterate the dict and get the values in order.

class ExtractJSON:
    ...
    def json2csv(self):
        result = "Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang\n"
        result_field_names = result.strip().split(';')  # Get the order of the keys

        result_dict = self.createRowForCSV()  # Get the row in dict format
        result_string = ";".join(map(str, [result_dict[key] for key in result_field_names]))  # Get the values in order and combine them into a string with ';' as the separator
        result += result_string + "\n"  # Append to the result

        return result
...

Solution 2:

Use csv.DictWriter to convert the response dictionary of createRowForCSV to a csv string.

import csv
from io import StringIO

class ExtractJSON:
    ...
    def json2csv(self):
        result = "Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang\n"
        result_field_names = result.strip().split(';')  # Get the order of the keys

        result_dict = self.createRowForCSV()  # Get the row in dict format
        result_string = StringIO()  # Create the string that will hold the result
        csv.DictWriter(result_string, result_field_names, delimiter=';').writerow(result_dict)  # Convert the dict to str
        result_string.seek(0)  # Since result_string is a file-like object, go back to the beginning
        result += result_string.read()  # Read the result string from the beginning to end and append to the result

        return result
...

Output

>>> print(ExtractJSON().json2csv())
Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang
1;2;3;4;5;6;7;9;0;10;11;12;13;14;15;16;17;18;19;20;21;22;23;2.4;twenty-five;twenty-6

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1