'ETL by parsing JSON dynamically, Python

I new to python. I want read auth column from PostgreSQL which gives a json. I need to parse it and get the relevant api credentials in it. Then based on these, I want to get the data which is again json but this time its deeply nested json and objects can be more or less in different json. Now, from these JSON, I want to get all the keys and insert these in Source column names in the source table as rows of sourceColumnNames column. Target Column may have less columns then source lets say only a and d from source as name and PostalCode.

I am wondering how can I achieve this. It looks to be done something like scala case classes, target and source model classes but its needed to be done in python. How?

Data in AuthColumn is

{   "url": "https://api.myUrl.com/v2",
    "headers": {
        "Authorization": "TheSecretAccessToken2022",
        "Content-Type": "application/json"
    },
    "data": {
        "query": "{ boards{ items{ name column_values {a b c d} } } }"
    } }

I need to parse it to get credentials and execute the query. Then it will return some JSON which I need to parse. This JSON could be like this

{
    "data": {
        "boards": [{
            "name": "DP",
            "id": "123",
            "description": null,
            "items": [{
                "name": "TheColumn",
                "column_values": [{
                    "a": "PDs",
                    "b": "PDs",
                    "c": "CI",
                    "d": "PV"
                }, {
                    "a": "SLUD",
                    "b": "SLUD",
                    "c": "d",
                    "d": "MFO"
                }, {
                    "a": "ST",
                    "b": "ST",
                    "c": "CI",
                    "d": "UC"
                }, {
                    "a": "c",
                    "b": "c",
                    "c": "CI",
                    "d": "NC"
                }, {
                    "a": "OP",
                    "b": "op",
                    "c": "CI",
                    "d": "0 days"
                }, {
                    "a": "OPd",
                    "b": "OPd",
                    "c": "CI",
                    "d": "2022-02-25"
                }, {
                    "a": "CD",
                    "b": "cd",
                    "c": "d",
                    "d": "2022-02-25"
                }, {
                    "a": "cld",
                    "b": "cld",
                    "c": "d",
                    "d": "2022-04-22"
                }, {
                    "a": "SoDce",
                    "b": "soDce",
                    "c": "CI",
                    "d": ""
                }, {
                    "a": "MOD",
                    "b": "MOD",
                    "c": "date",
                    "d": ""
                }, {
                    "a": "PP",
                    "b": "PP",
                    "c": "nuUDic",
                    "d": "625000"
                }, {
                    "a": "UD",
                    "b": "UD",
                    "c": "nuUDic",
                    "d": ""
                }, {
                    "a": "PAVSP",
                    "b": "PAVSP",
                    "c": "neUDic",
                    "d": ""
                }, {
                    "a": "LendeUD",
                    "b": "lendeUD",
                    "c": "CI",
                    "d": "TBD"
                }, {
                    "a": "ESP",
                    "b": "ESP",
                    "c": "CI",
                    "d": ""
                }, {
                    "a": "ac",
                    "b": "ac",
                    "c": "CI",
                    "d": "Chicago"
                }, {
                    "a": "SLd",
                    "b": "SLd",
                    "c": "CI",
                    "d": ""
                }, {
                    "a": "UA",
                    "b": "UA",
                    "c": "CI",
                    "d": ""
                }, {
                    "a": "UD",
                    "b": "UD",
                    "c": "CI",
                    "d": ""
                }, {
                    "a": "R?",
                    "b": "R",
                    "c": "CI",
                    "d": ""
                }, {
                    "a": "DDE",
                    "b": "DDE",
                    "c": "CI",
                    "d": ""
                }, {
                    "a": "SOD",
                    "b": "SOD",
                    "c": "CI",
                    "d": ""
                }, {
                    "a": "NOS",
                    "b": "NOS",
                    "c": "d",
                    "d": ""
                }]
            }, {
                "name": "BBB",
                "column_values": [{
                    "a": "PeUDs",
                    "b": "PeUDs",
                    "c": "CI",
                    "d": "PV"
                }, {
                    "a": "SLUD",
                    "b": "SLUD",
                    "c": "d",
                    "d": "Ddd"
                }, {
                    "a": "ST",
                    "b": "ST",
                    "c": "CI",
                    "d": "UC"
                }, {
                    "a": "c",
                    "b": "c",
                    "c": "CI",
                    "d": "NC"
                }, {
                    "a": "OP",
                    "b": "op",
                    "c": "CI",
                    "d": "0 days"
                }, {
                    "a": "OPd",
                    "b": "OPd",
                    "c": "CI",
                    "d": "2022-02-23"
                }, {
                    "a": "CD",
                    "b": "cd",
                    "c": "d",
                    "d": "2022-02-23"
                }, {
                    "a": "cld",
                    "b": "cld",
                    "c": "d",
                    "d": "2022-03-04"
                }, {
                    "a": "SoDce",
                    "b": "soDce",
                    "c": "CI",
                    "d": ""
                }, {
                    "a": "MOD",
                    "b": "MOD",
                    "c": "date",
                    "d": ""
                }, {
                    "a": "PP",
                    "b": "PP",
                    "c": "nuUDic",
                    "d": "3200"
                }, {
                    "a": "UD",
                    "b": "UD",
                    "c": "numeic",
                    "d": ""
                }, {
                    "a": "PDVSP",
                    "b": "PDVSP",
                    "c": "nueUDic",
                    "d": ""
                }, {
                    "a": "ESP",
                    "b": "ESP",
                    "c": "CI",
                    "d": ""
                }, {
                    "a": "ac",
                    "b": "ac",
                    "c": "CI",
                    "d": "Chicago a"
                }, {
                    "a": "SLd",
                    "b": "SLd",
                    "c": "CI",
                    "d": ""
                }, {
                    "a": "UA",
                    "b": "UA",
                    "c": "CI",
                    "d": ""
                }, {
                    "a": "UD",
                    "b": "UD",
                    "c": "CI",
                    "d": ""
                }, {
                    "a": "R?",
                    "b": "R",
                    "c": "CI",
                    "d": ""
                }, {
                    "a": "DDE",
                    "b": "DDE",
                    "c": "CI",
                    "d": "DooU"
                }, {
                    "a": "SOD",
                    "b": "SOD",
                    "c": "CI",
                    "d": ""
                },  {
                    "a": "IU",
                    "b": "IU",
                    "c": "CI",
                    "d": ""
                },{ "a": "DD",
                    "b": "DD",
                    "c": "CI",
                    "d": ""
                }, {
                    "a": "LOS",
                    "b": "LOS",
                    "c": "num",
                    "d": ""
                }, {
                    "a": "NOS",
                    "b": "NOS",
                    "c": "d",
                    "d": ""
                }]          }]      }]  }}

Now, I want to parse this Json and get keys and insert then to columnNames column in Meta Data Table as

sourceColumnNames 
name
id
description
items_name
a
b
c
d

Then I will query auth, get creds, and get values based on these source columns.

So far, I have parsed JSON by json in python using index.

import json


with open('path/file.json') as myJson:
    read_myjson = json.load(myJson)

read_data = read_myjson['data']
read_board = read_myjson['data']['boards']
board_name = read_myjson['data']['boards'][0]['name']
board_id = read_myjson['data']['boards'][0]['id']
board_description = read_myjson['data']['boards'][0]['description']
board_items = read_myjson['data']['boards'][0]['items']
board_items_name = read_myjson['data']['boards'][0]['items'][0]['name']
board_items_columnValues = read_myjson['data']['boards'][0]['items'][0]['column_values']
board_items_columnValues_title = read_myjson['data']['boards'][0]['items'][0]['column_values'][0]['a']
board_items_columnValues_id = read_myjson['data']['boards'][0]['items'][0]['column_values'][0]['b']
board_items_columnValues_type = read_myjson['data']['boards'][0]['items'][0]['column_values'][0]['c']
board_items_columnValues_text = read_myjson['data']['boards'][0]['items'][0]['column_values'][0]['d']
    
# for loop on Header
print("printing Header loop  : ")
for key, val in read_myjson.items():
    print(key, ":::", val)
    headerKey = key
    headerValue = val        
    print("printing data loop  : it gives board key and its value")
for key, val in read_data.items():
    # print(key, ":::", val)
    datakey = key
    dataValue = val
    # print(datakey, "::::", dataValue)    
print(" items loop")
# for key, val in read_board.items():
for item in board_items:
    for key, val in item.items():
        # print(key, ":::", val)
        compDataAsKey = key
        compDataAsValue = val    
print(" Items_column_values loop")    
columnKeys = []
columnValues = []
for items in board_items_columnValues:
    for key, val in items.items():
        # print(key, ":", val)
        # compColumnKey = key
        # compColumnValue = val
        columnKeys.append(key)
        columnValues.append(val)

I have also tried dataclasses in python but cant actually map the class to json parse etc.

import json
import orjson, dataclasses

with open('path/AuthJsonSample.json') as myJson:
       read_myjson = json.load(myJson)

@dataclasses.dataclass
class AuthData:
       url: str
       headers: str
       data: str

How can I make this etl pipeline?



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source