'Having type error in python while converting json to excel
I am trying to convert json to excel file but I am having a type error and I am not been able to solve the error. The code is first converting json data into dictionary(which I need also for clips facts) than convert it into excel. Any help would be appreciated. Thank you in advance. Following is my code:
from io import BytesIO
import sys
sys.path.append('/usr/lib/python3/dist-packages')
import json
import openpyxl
from openpyxl import load_workbook
import logging
logger = logging.getLogger('ExtractJSON')
class ExtractJSON(object):
def __init__(self):
""" Initialisierung """
pass
def convert(self, data):
""" PDF Extract API """
#jsondata = self.json2excel(data)
jsondata = self.json2csv(data)
return jsondata
#=======================================================================
# JSON to Excel
#=======================================================================
def json2excel(self, data):
""" JSON to Excel """
header = ["Seite","ID","Text","Pfad","Fett","Kursiv","Unterstrichen","Versal","PT","Alignment","BlockAlign","InlineAlign","Col","Row","Width",\
"Height","LineHeight","SpaceAfter","Placement","Bemerkung","Bounds","ClipBounds","BBox","Font","HasClip","Lang"]
jsondata = json.loads(data)
elements = jsondata['elements']
excelrow = 1
sid = 0
hd = 1
wb = openpyxl.Workbook()
sheet = wb.active
for head in header:
sheet.cell(excelrow, hd).value = head
hd += 1
for elm in elements:
sid += 1
excelrow += 1
cl = 0
row = self.createRow(elm, sid)
if row is None:
sheet.cell(excelrow, 1).value = "....."
continue
for column in header: # TODO: enumerate!
cl += 1
sheet.cell(excelrow, cl).value = row.get(column, '?????')
kids = elm.get('Kids', [])
kid = 0
for kidelm in kids:
kid += 1
excelrow += 1
cl = 0
sidstr = f"{sid}.{kid}"
row = self.createRow(kidelm, sidstr)
for column in header: # TODO: enumerate! # TODO: refactor
cl += 1
sheet.cell(excelrow, cl).value = row.get(column, '?????')
# FIXME: Check: Do we have recursive 'Kids' in the JSON file?
wbresult = BytesIO()
wb.save(wbresult)
return wbresult.getvalue()
def createRow(self, elm, sid):
""" For Excel """
try:
page = elm.get('Page', '')
if page or page == 0:
page += 1
xpath = elm['Path']
text = elm.get('Text', elm.get('text', ''))
font = elm.get('Font', {})
if not text and not font and xpath.find("Figure") > -1: # TODO temporary
logger.info("Figure found, no font and no text, continue..")
return ""
bold = "x" if font.get('weight', '') == 700 else ''
italic = "x" if font.get('italic', '') else ''
underline = font.get('underline', '') # TODO No example yet
versal = font.get('capital', '') # TODO No example yet
fontsize = elm.get('TextSize', '')
if fontsize and (abs(round(fontsize) - fontsize) < 0.10):
fontsize = round(fontsize)
attributes = elm.get('attributes', {})
alignment = attributes.get('TextAlign', '')
lineHeight = attributes.get('LineHeight', '')
spaceAfter = attributes.get('SpaceAfter', '')
placement = attributes.get('Placement', '')
note = ""
bounds = elm.get('Bounds', '')
clipBounds = elm.get('ClipBounds', '')
if bounds:
bounds = ", ".join([str(round(bd)) for bd in bounds])
if clipBounds:
clipBounds = ", ".join([str(round(bd)) for bd in clipBounds])
font = str(font)
hasClip = elm.get('HasClip', '')
lang = elm.get('Lang', '')
# Tabelle
blockAlign = attributes.get('BlockAlign', '')
inlineAlign = attributes.get('InlineAlign', '')
col = attributes.get('ColIndex', '')
row = attributes.get('RowIndex', '')
if col or col == 0:
col += 1
if row or row == 0:
row += 1
width = attributes.get('width', '')
height = attributes.get('height', '')
bBox = attributes.get('BBox', '')
if bBox:
bBox = ", ".join([str(round(bd)) for bd in bBox])
#
result = {'Seite':page, 'ID':sid, 'Text':text, 'Pfad':xpath, 'Fett':bold, 'Kursiv':italic, 'Unterstrichen':underline,\
'Versal':versal, 'PT':fontsize, 'Alignment':alignment, 'BlockAlign':blockAlign, 'InlineAlign':inlineAlign,\
'Col':col, 'Row':row, 'Width':width, 'Height':height, 'LineHeight':lineHeight, 'SpaceAfter':spaceAfter,\
'Placement':placement, 'Bemerkung':note, 'Bounds':bounds, 'ClipBounds':clipBounds, 'BBox':bBox,\
'Font':font, 'HasClip':hasClip, 'Lang':lang}
except Exception as e:
logger.exception(e)
logger.info(elm)
return ""
return result
#=======================================================================
# JSON to CSV (Alternative for JSON to Excel)
#=======================================================================
def json2csv(self, data):
""" JSON to CSV """
result = "Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;"\
"Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang\n"
jsondata = json.loads(data)
elements = jsondata['elements']
sid = 0
for elm in elements:
sid += 1
result += self.createRow(elm, sid)
kids = elm.get('Kids', [])
kid = 0
for kidelm in kids:
kid += 1
sidstr = f"{sid}.{kid}"
result += self.createRowForCSV(kidelm, sidstr)
filename=json2excel.csv")
return result
def createRowForCSV(self, elm, sid):
""" """
try:
page = elm.get('Page', '')
if page or page == 0:
page += 1
xpath = elm['Path']
text = elm.get('Text', elm.get('text', ''))
font = elm.get('Font', {})
if not text and not font and xpath.find("Figure") > -1: # TODO temporary
logger.info("Figure found, no font and no text, continue..")
return ""
bold = "x" if font.get('weight', '') == 700 else ''
italic = "x" if font.get('italic', '') else ''
underline = font.get('underline', '') # TODO No example yet
versal = font.get('capital', '') # TODO No example yet
fontsize = elm.get('TextSize', '')
if fontsize and (abs(round(fontsize) - fontsize) < 0.10):
fontsize = round(fontsize)
attributes = elm.get('attributes', {})
alignment = attributes.get('TextAlign', '')
lineHeight = attributes.get('LineHeight', '')
spaceAfter = attributes.get('SpaceAfter', '')
placement = attributes.get('Placement', '')
note = ""
bounds = elm.get('Bounds', '')
clipBounds = elm.get('ClipBounds', '')
if bounds:
bounds = ", ".join([str(round(bd)) for bd in bounds])
if clipBounds:
clipBounds = ", ".join([str(round(bd)) for bd in clipBounds])
font = str(font)
hasClip = elm.get('HasClip', '')
lang = elm.get('Lang', '')
# Tabelle
blockAlign = attributes.get('BlockAlign', '')
inlineAlign = attributes.get('InlineAlign', '')
col = attributes.get('ColIndex', '')
row = attributes.get('RowIndex', '')
if col or col == 0:
col += 1
if row or row == 0:
row += 1
width = attributes.get('width', '')
height = attributes.get('height', '')
bBox = attributes.get('BBox', '')
if bBox:
bBox = ", ".join([str(round(bd)) for bd in bBox])
#
result = f"{page};'{sid}';'{text}';{xpath};{bold};{italic};{underline};{versal};{fontsize};{alignment};{blockAlign};{inlineAlign};{col};{row};\
{width};{height};'{lineHeight}';'{spaceAfter}';{placement};{note};{bounds};{clipBounds};{bBox};{font};{hasClip};{lang}\n"
except Exception as e:
logger.exception(e)
logger.info(elm)
return ""
return result
INPUTPATH = "/home/abc/Clips/JSON/structuredData.json"
OUTPUTPATH = "/home/abc/Clips/JSON/result.xlsx"
if __name__ == '__main__':
converter = ExtractJSON()
f = open(INPUTPATH, "r"); data = f.read(); f.close()
print ("JSON converting..")
result = converter.convert(data)
g = open(OUTPUTPATH, "wb"); g.write(result); g.close()
print (f"Excel stored on {OUTPUTPATH}")
sys.exit(0)
The error I am getting is:
JSON converting..
Traceback (most recent call last):
File "/home/abc/Clips/JSON/json2excel.py", line 222, in <module>
result = converter.convert(data)
File "/home/abc/Clips/JSON/json2excel.py", line 23, in convert
jsondata = self.json2csv(data)
File "/home/abc/Clips/JSON/json2excel.py", line 146, in json2csv
result += self.createRow(elm, sid)
TypeError: can only concatenate str (not "dict") to str
Solution 1:[1]
Long story short, this is the minimal-reproducible-example of your code:
class ExtractJSON:
def createRowForCSV(self):
return {
'Seite':1, 'ID':2, 'Text':3, 'Pfad':4, 'Fett':5, 'Kursiv':6, 'Unterstrichen':7,
'Versal':9, 'PT':0, 'Alignment':10, 'BlockAlign':11, 'InlineAlign':12,
'Col':13, 'Row':14, 'Width':15, 'Height':16, 'LineHeight':17, 'SpaceAfter':18,
'Placement':19, 'Bemerkung':20, 'Bounds':21, 'ClipBounds':22, 'BBox':23,
'Font':2.4, 'HasClip':"twenty-five", 'Lang':"twenty-6"
}
def json2csv(self):
result = "Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang\n"
result += self.createRowForCSV()
return result
Output
Traceback (most recent call last):
File "Main.py", line 28, in <module>
print(ExtractJSON().json2csv())
File "Main.py", line 17, in json2csv
result += self.createRowForCSV()
TypeError: can only concatenate str (not "dict") to str
Now, we can clearly see that the reason of the error is that result is an str but yet you are appending to it the output of createRowForCSV which is a dict. To visualize this further, you are like doing:
"Some text" + {"another": "one"}
which obviously would fail.
Solution 1:
Manually iterate the dict and get the values in order.
class ExtractJSON:
...
def json2csv(self):
result = "Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang\n"
result_field_names = result.strip().split(';') # Get the order of the keys
result_dict = self.createRowForCSV() # Get the row in dict format
result_string = ";".join(map(str, [result_dict[key] for key in result_field_names])) # Get the values in order and combine them into a string with ';' as the separator
result += result_string + "\n" # Append to the result
return result
...
Solution 2:
Use csv.DictWriter to convert the response dictionary of createRowForCSV to a csv string.
import csv
from io import StringIO
class ExtractJSON:
...
def json2csv(self):
result = "Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang\n"
result_field_names = result.strip().split(';') # Get the order of the keys
result_dict = self.createRowForCSV() # Get the row in dict format
result_string = StringIO() # Create the string that will hold the result
csv.DictWriter(result_string, result_field_names, delimiter=';').writerow(result_dict) # Convert the dict to str
result_string.seek(0) # Since result_string is a file-like object, go back to the beginning
result += result_string.read() # Read the result string from the beginning to end and append to the result
return result
...
Output
>>> print(ExtractJSON().json2csv())
Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang
1;2;3;4;5;6;7;9;0;10;11;12;13;14;15;16;17;18;19;20;21;22;23;2.4;twenty-five;twenty-6
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 |
