'Django forms - Pandas displaying integer as columns names instead of actual names

I'm building a form to select columns from a CSV/XLSX file and then convert the selection to a dataframe. The parsing is working and I can get a dataframe. But in the dataframe, columns names are integers and not the actual names. I can't figure out why.

My forms in forms.py which enables me to select columns:


class CompareFormTransporteur(forms.ModelForm):
    file = forms.FileField(label="Fichier (CSV, XLSX, XML) ", required=True)
    header_row = forms.IntegerField(label="Header row", required=True)


    class Meta:
        model = CheckFile
        fields = ['file',]

    def __init__(self, request, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.request = request
        self.request.session['header_row'] = self['header_row'].value()
        super().__init__(*args, **kwargs)

    def clean(self):
        super().clean()
        extension = os.path.splitext(self.request.FILES['file'].name)[1]
        integer = self.request.session['header_row']
        print(integer)
        if extension in ['.xlsx', '.xls']:
            uploaded = parse_excel(self.request.FILES['file'], rowheader=2)
        elif extension == ".csv":
            uploaded = parse_csv(self.request.FILES['file'])
        elif extension == ".xml":
            uploaded = parse_xml(self.request.FILES['file'])

        self.request.session['uploaded'] = uploaded
        self.request.session['profile'] = self.cleaned_data.get('profile')

class CompareFormPartTwo(forms.Form):
    columns = forms.MultipleChoiceField(label="Colonnes", widget=forms.CheckboxSelectMultiple())

    def __init__(self, request, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.request = request
        self.uploaded = request.session['uploaded']
        columns_choices = []
        for key in enumerate(self.uploaded):
            columns_choices.append(key)

        self.fields['columns'].choices = columns_choices

    def clean_columns(self):
        """Valide les données sur les columns et transforme les choix en int."""
        columns = self.cleaned_data['columns']

        return [int(column) for column in columns]

    def clean(self):
        super().clean()
        self.request.session['selection'] = self.cleaned_data.get('columns')
        print(self.request.session['selection'])

My code in views.py:

class FormCompareTransporteur(RequestFormMixin, CreateView):
    """Implémente la première étape : le téléchargement du fichier."""

    template_name = 'tool/upload-fichier-transporteur.html'
    form_class = CompareFormTransporteur
    success_url = reverse_lazy('tool:upload-columns-selection')

class FormColumnSelection(RequestFormMixin, FormView):
    """Implémente la seconde étape: le choix multiple dynamique -> choix des colonnes """

    template_name = 'tool/upload-columns-selection.html'
    form_class = CompareFormPartTwo
    success_url = reverse_lazy('tool:result')

class ResultView(TemplateView):
    template_name = 'tool/result.html'

    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
        context['uploaded'] = self.request.session['uploaded']
        context['selection'] = self.request.session['selection']
        context['profile'] = self.request.session['profile']
        dict_comparaison_p1 = dict(enumerate(context['uploaded'].items()))
        list_comparaison_p2 = context['selection']
        dict_comparaison_p2 = {number: dict_comparaison_p1[number] for number in list_comparaison_p2}
        d_col = {}
        df = pd.DataFrame()
        for idx, (k, v) in enumerate(dict_comparaison_p2.items()):
            d_col[k] = v[0]
            df = df.append(pd.DataFrame(data=v[1], index=[idx]))
        df = df.T.rename(columns = d_col)
        print(df)

        return context

Here's an example of what I get as dataframe:

        0           1    2
0      FR  0076029075  NaN
1      FR  0076036929  NaN
2      FR  0076044278  NaN
3      FR  0076044551  NaN
4      FR  0076047452  NaN

Could you please help me to get the names instead of those integers?



Solution 1:[1]

Ok found the solution. The problem was because of the index belonging to the dictionary d_col. The dictionary I got was this one:

{7: 'Value 1', 8: 'Value 2', 9: 'Value 3'}

Dictionary keys were not matching integer columns as mentioned above:

        0           1    2
0      FR  0076029075  NaN
1      FR  0076036929  NaN
2      FR  0076044278  NaN
3      FR  0076044551  NaN
4      FR  0076047452  NaN

The solution is to reset integers in the dictionary. So, by adding:

d_col2 = {i: v for i, v in enumerate(d_col.values())}

Such as:

    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
        context['uploaded'] = self.request.session['uploaded']
        context['selection'] = self.request.session['selection']
        context['profile'] = self.request.session['profile']
        dict_comparaison_p1 = dict(enumerate(context['uploaded'].items()))
        list_comparaison_p2 = context['selection']
        dict_comparaison_p2 = {number: dict_comparaison_p1[number] for number in list_comparaison_p2}
        d_col = {}
        df = pd.DataFrame()
        for idx, (k, v) in enumerate(dict_comparaison_p2.items()):
            d_col[k] = v[0]
            # print(d_col[k])
            df = df.append(pd.DataFrame(data=v[1], index=[idx]))


        d_col2 = {i: v for i, v in enumerate(d_col.values())}
        df = df.T.rename(columns=d_col2)

        return context

It's now working and I get the columns names.

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Vincent