'Combine duplicate tokens inside huge JSON file into nested array of objects using React

I looked at several of the suggested solutions but none seemed to rise to this confounding data formatting challenge.

I have a huge JSON file (over 100k rows) and massive duplicates of data all as top level objects. Here's an example:

[
   {
      "manufacturer":"Samsung",
      "device":"Galaxy A32 5G",
      "model":"SM-A326B",
      "chipset":"Mediatek MT6853V/NZA",
      "date":"2022-01-01",
      "fw_id":"A326BXXS4AVA1",
      "android":"R(Android 11)",
      "known_passcode":false,
      "afu":false,
      "bfu":false,
      "bruteforce":false
   },
   {
      "manufacturer":"Samsung",
      "device":"Galaxy A32 5G",
      "model":"SM-A326U",
      "chipset":"Mediatek MT6853V/NZA",
      "date":"2021-03-01",
      "fw_id":"A326USQU1AUD4",
      "android":"R(Android 11)",
      "known_passcode":true,
      "afu":false,
      "bfu":true,
      "bruteforce":true
   },
   {
      "manufacturer":"Samsung",
      "device":"Galaxy A32 5G",
      "model":"SM-A326U1",
      "chipset":"Mediatek MT6853V/NZA",
      "date":"2021-09-01",
      "fw_id":"A326U1UEU5AUJ2",
      "android":"R(Android 11)",
      "known_passcode":true,
      "afu":false,
      "bfu":true,
      "bruteforce":true
   },
   {
      "manufacturer":"LGE",
      "device":"LG K31",
      "model":"LGL355DL",
      "chipset":"Mediatek MT6762",
      "date":"unknown",
      "fw_id":"L355DL10l",
      "android":"unknown",
      "known_passcode":false,
      "afu":false,
      "bfu":false,
      "bruteforce":false
   }
]

This needs to be organized so that data points like manufacturer, device, model are not duplicated hundreds of times.

Btw, here's a JSFiddle to play with: https://jsfiddle.net/xpancom/Lq7duahv/

Ideally, the JSON format would be the following:

[
  {
    "manufacturers": [
      {
        "manufacturer": "Samsung",
        "devices": [
          {
            "device": "Galaxy A32 5G",
            "models": [
              {
                "model": "SM-A326B",
                "data": [
                  {
                    "chipset": "Mediatek MT6853V/NZA",
                    "date": "2022-01-01",
                    "fw_id": "A326BXXS4AVA1",
                    "android": "R(Android 11)",
                    "known_passcode": false,
                    "afu": false,
                    "bfu": false,
                    "bruteforce": false
                  },
                  {
                    "chipset": "Mediatek MT6853V/NZA",
                    "date": "2021-09-01",
                    "fw_id": "A326BXXU3AUH7",
                    "android": "R(Android 11)",
                    "known_passcode": true,
                    "afu": false,
                    "bfu": true,
                    "bruteforce": true
                  }
                ]
              },
              {
                "model": "SM-A326U1",
                "data": [
                  {
                    "chipset": "Mediatek MT6853V/NZA",
                    "date": "2021-09-01",
                    "fw_id": "A326U1UEU5AUJ2",
                    "android": "R(Android 11)",
                    "known_passcode": true,
                    "afu": false,
                    "bfu": true,
                    "bruteforce": true
                  }
                ]
              }
            ]
          }
        ]
      },
      {
        "manufacturer": "LGE",
        "devices": [
          {
            "device": "LG K31",
            "models": [
              {
                "model": "SM-A326B",
                "data": [
                  {
                    "chipset": "Mediatek MT6762",
                    "date": "unknown",
                    "fw_id": "L355DL10l",
                    "android": "unknown",
                    "known_passcode": false,
                    "afu": false,
                    "bfu": false,
                    "bruteforce": false
                  }
                ]
              }
            ]
          }
        ]
      }
    ]
  }
]

Working in React, here's what I've got so far in trying to massage this data:

  const source = data;
  const destination = [];
  const classifiedTokens = []; // will be used to stored already classified tokens
  const classifiedTokensModel = []; // will be used to stored already classified tokens for models

  const getNextTokenArray = (source) => {
    let unusedToken = null;
    const nextTokenArray = source.filter(function (element) {
      if (!unusedToken && !classifiedTokens.includes(element['device'])) {
        unusedToken = element['device'];
        classifiedTokens.push(unusedToken);
      }
      return unusedToken ? unusedToken === element['device'] : false;
    });
    return unusedToken ? nextTokenArray : null;
  };

  // Pass in arrays deconstructed from addToDestination to process third tier nested objects for models
  const getNextTokenArrayModel = (tokenObject) => {
    let tokenObjectDevice = tokenObject['device'];
    let tokenObjectData = tokenObject['data'];
    let unusedTokenModel = null;
    const nextTokenArrayModel = tokenObjectData.filter(function (element) {
      if (!unusedTokenModel && !classifiedTokensModel.includes(element['model'])) {
        unusedTokenModel = element['model'];
        classifiedTokensModel.push(unusedTokenModel);
      }
      return unusedTokenModel ? unusedTokenModel === element['model'] : false;
    });
    //return unusedTokenModel ? nextTokenArrayModel : null;

    if (unusedTokenModel) {
      if (nextTokenArrayModel.length === 0) return;
      let res = {
        device: tokenObjectDevice,
        model: nextTokenArrayModel[0]['model'],
        data: [],
      };
      nextTokenArrayModel.forEach((element) => {
        res.data.push({
          manufacturer: element.manufacturer,
          chipset: element.chipset,
          date: element.date,
          fw_id: element.fw_id,
          android: element.android,
          knownPasscode: element.knownPasscode,
          afu: element.afu,
          bfu: element.bfu,
          bruteforce: element.bruteforce,
        });
      });
      destination.push(res);
    } else {
      return null;
    }
    
  };

  const addToDestination = (tokenArray) => {
    if (tokenArray.length === 0) return;
    let res = {
      device: tokenArray[0]['device'],
      data: [],
    };
    tokenArray.forEach((element) => {
      res.data.push({
        manufacturer: element.manufacturer,
        model: element.model,
        chipset: element.chipset,
        date: element.date,
        fw_id: element.fw_id,
        android: element.android,
        knownPasscode: element.knownPasscode,
        afu: element.afu,
        bfu: element.bfu,
        bruteforce: element.bruteforce,
      });
    });

    getNextTokenArrayModel(res); // Call this to process and group nested model duplicates by device

    //destination.push(res);
  };

  let nextTokenArray = getNextTokenArray(source);

  while (nextTokenArray) {
    addToDestination(nextTokenArray);
    nextTokenArray = getNextTokenArray(source);
  }

  setTimeout(() => {
    document.getElementById('root').innerHTML =
      '<pre>' + JSON.stringify(destination, null, 2) + '</pre>';
  }, 1000);

};


And here's the JSFiddle again: https://jsfiddle.net/xpancom/Lq7duahv/

Who can smash this data formatting dilemma?



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source