'Python appending a list to a dictionary. List is split into individual chars instead of the whole

I have a spreadsheet. It has Identifying numbers and then multiple codes associated to those numbers. For example;

ID Code
1 ABC1234
1 CBA1234
2 ABS1234
2 DEF3456

etc...

I am trying to iterate through the informatoion and create dictionaries with ID as the Key and a list of Codes as the value. My prblem is that the values are all being split into the individual characters. i.e; {1: ['A', 'B', 'C', '1', '2', '3', '4', 'C', 'B', 'A', '1', '2', '3', '4']}

The code I have is

from openpyxl import *

FilePath = "File.xlsx"

wb = load_workbook(filename=FilePath)


Sheet = wb.active
Sheet1 = wb['Sheet1']
Sheet2 = wb['Sheet2']
Dict1 = {}
UnitPlace = Sheet1.iter_cols(min_row=2, max_row=2, min_col=18, max_col=110)
Dict = {}
def add_values_in_dict(sample_dict, key, list_of_values):
    if key not in sample_dict:
        sample_dict[key] = list()
    sample_dict[key].extend(list_of_values)
    return sample_dict

def read():
    for SID2 in Sheet2['A']:
        UnitInSheet2 = Sheet2['D'+(str(SID2.row))].value
        Dict1 = add_values_in_dict(Dict, SID2.value, UnitInSheet2)
        print (Dict1)
        for SID1 in Sheet1['D']:
            Rows = SID1.row
read()


Solution 1:[1]

Update: Reading Excel file using Openpyxl

We can iterate the rows using Openpyxl. Then we store the ID-codes as a dictionary where ID is the key and Code is the list of values.

from openpyxl import load_workbook


def read_file_openpyxl(file_path):
    wb = load_workbook(filename=file_path)
    sheet_1 = wb['Sheet1']
    frequency = {}
    for row in sheet_1.values:
        if type(row[0]) == int:
            id = row[0]
            code = row[1]
            if id in frequency:
                frequency[id].append(code)
            else:
                frequency[id] = [code]
    return frequency


if __name__ == "__main__":
    FilePath = "dummy.xlsx"
    print(read_file_openpyxl(FilePath))

Output:

{1: ['ABC1234', 'CBA1234'], 2: ['ABS1234', 'DEF3456']}

Alternative solution using Pandas:

Alternatively, you can use pandas.read_excel method to read the excel file.

import pandas as pd


def read_data(file_path):
    dataset = pd.read_excel(file_path)
    ids = list(dataset["ID"])
    codes = list(dataset["Code"])
    frequency = {}
    for i in range(len(ids)):
        id = ids[i]
        code = codes[i]
        if id in frequency:
            frequency[id].append(code)
        else:
            frequency[id] = [code]
    return frequency


if __name__ == "__main__":
    FilePath = "dummy.xlsx"
    freq = read_data(FilePath)
    print(freq)

Output:

{1: ['ABC1234', 'CBA1234'], 2: ['ABS1234', 'DEF3456']}

Explanation:

  • First, we read the excel file using pandas.read_excel method.
  • Then we separated ids and codes with the rows of the excel sheet.
  • We used a dictionary named frequency to store the occurrences of code for each unique id.

References:

Solution 2:[2]

I favor list comprehension more, following is my solution

# Import openpyxl
# Note: openpyxl package provides both read and write capabilities to excel
import openpyxl
import os


# Class definitions should use CamelCase convention based on pep-8 guidelines
class CustomOpenpyxl:

    # Initialize the class with filename as only argument
    def __init__(self, _my_file_name):
        assert _my_file_name.split('.')[-1] == 'xlsx', 'Input file is not xlsx'
        self.my_filename = _my_file_name
        self.my_base_wb = openpyxl.load_workbook(self.my_filename, read_only=False)
        # following line will get the names of worksheets in the workbook
        self.ws_names_in_my_base_wb = self.my_base_wb.sheetnames
        # following line will set the last worksheet in the workbook as active
        self.my_base_active_ws = self.my_base_wb.active

    # Method to set a specific worksheet as active where column names are in row#1
    # Argument to this method is: - worksheet name
    def active_ws(self, _ws_name):
        # if the worksheet name exists in the workbook
        if self.if_ws_in_wb(_ws_name):
            self.my_base_active_ws = self.my_base_wb[_ws_name]
        else:
            print('Worksheet {} not found in workbook'.format(_ws_name))

    # Method to check if a given worksheet exists in workbook
    # Argument to this method is: workbook name
    def if_ws_in_wb(self, _ws_name):
        # if worksheet exists in list of worksheets of workbook returns True else False
        if _ws_name in self.ws_names_in_my_base_wb:
            return True
        return False

    # Create a dictionary from excel cell values
    # No arguments to this method
    def create_dict_from_values(self):
        # Create an empty dictionary
        _my_dict = dict()
        # get the unique keys (numeric) from the first column of the worksheet
        _my_keys = set([_row[0] for _row in self.my_base_active_ws.values if str(_row[0]).isdigit()])
        # Iterate over the keys and add the values as a list
        for _ in _my_keys:
            # using list comprehension add the values to the key or keys
            _my_dict[_] = [_row[1] for _row in self.my_base_active_ws.values if _row[0] == _]
        return _my_dict


_my_file_path = os.getcwd()
_my_file_name = 'Book1.xlsx'

# Instantiate an object using the newly created class in this code block
# So that this object gets access to all methods in the class
_my_src_obj = CustomOpenpyxl(_my_file_name)
print(_my_src_obj.create_dict_from_values())
####################################################################PRINT Result################################################################################

{1: ['ABC1234', 'CBA1234'], 2: ['ABS1234', 'DEF3456']}

Process finished with exit code 0

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1
Solution 2