'Python convert an invalid json api response to dataframe
I am basically calling a splunk api to fetch the output in json, but by default the result is not in a proper json format.
This is the output which I get from the api
{"preview":false,"lastrow":true}\n',
'{"preview":true,"offset":0,"lastrow":true,"result”:{“user”:”abc1213”,”ip":"10.1.1.1","path”:”/home/search/“,”url_status”:”200”,”count":"7"}}\n{"preview":true,"offset":0,"lastrow":true,"result":{"user”:”abc1213”,”ip”:”10.1.1.1”,”path”:”/browse/contract/payment”,”url_status":"400","count":"7"}}\n{"preview":false,"offset":0,"lastrow":true,"result":{"user”:”abc”123,”ip”:”10.1.1.1”,”path”:”/home/data/contact“,”url_status”:”200”,”count":"7"}}\n'
Here the response type is str, I am not sure how to convert it and append the results in nice format, where I can have columns like user | ip | path | url_status
response = requests.request("POST", url, headers=headers, data=safe_payload, verify=False)
return response.text
I have tried adding the missing quotes to make the response like json, but still json.loads doesn't works.
Solution 1:[1]
This is the solution, if someone out there needs. The code will transform the splunk api invalid json to pandas dataframe into rows and columns.
import pandas as pd
import requests
import json
import urllib3
from base64 import b64encode
import urllib.parse
from csv import reader
urllib3.disable_warnings()
data=[]
output1 = pd.DataFrame()
def fetch_data_using_userid(userid):
url = "https://localhost:8000/services/search/jobs/export"
payload = {
'search': f'search earliest = -40h index=* user_id="{userid}" | stats count by user user_ip ',
'output_mode':'json'
}
safe_payload = urllib.parse.urlencode(payload)
userAndPass = b64encode(b"admin:pass").decode("ascii")
headers = {
'Authorization': 'Basic %s' % userAndPass,
'Content-Type': 'application/json; charset=UTF-8'
}
response = requests.request("POST", url, headers=headers, data=safe_payload, verify=False)
return response.text
# open file in read mode
with open('/Users/Downloads/user_data.csv', 'r') as read_obj:
# pass the file object to reader() to get the reader object
csv_reader = reader(read_obj)
header = next(csv_reader)
if header is not None:
# Iterate over each row in the csv using reader object
for row in csv_reader:
# row variable is a list that represents a row in csv
print(row)
response_text = fetch_data_using_userid(row[0])
data = response_text
with open('data.txt', 'w') as wf:
wf.write(data)
with open('data.txt') as rf:
for entry in rf:
LDict = json.loads(entry)
output1 = output1.append(LDict,ignore_index=True)
print(response_text)
# result1 = pd.json_normalize(output1['result'])
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 |
