'Django forms - Pandas displaying integer as columns names instead of actual names
I'm building a form to select columns from a CSV/XLSX file and then convert the selection to a dataframe. The parsing is working and I can get a dataframe. But in the dataframe, columns names are integers and not the actual names. I can't figure out why.
My forms in forms.py which enables me to select columns:
class CompareFormTransporteur(forms.ModelForm):
file = forms.FileField(label="Fichier (CSV, XLSX, XML) ", required=True)
header_row = forms.IntegerField(label="Header row", required=True)
class Meta:
model = CheckFile
fields = ['file',]
def __init__(self, request, *args, **kwargs):
super().__init__(*args, **kwargs)
self.request = request
self.request.session['header_row'] = self['header_row'].value()
super().__init__(*args, **kwargs)
def clean(self):
super().clean()
extension = os.path.splitext(self.request.FILES['file'].name)[1]
integer = self.request.session['header_row']
print(integer)
if extension in ['.xlsx', '.xls']:
uploaded = parse_excel(self.request.FILES['file'], rowheader=2)
elif extension == ".csv":
uploaded = parse_csv(self.request.FILES['file'])
elif extension == ".xml":
uploaded = parse_xml(self.request.FILES['file'])
self.request.session['uploaded'] = uploaded
self.request.session['profile'] = self.cleaned_data.get('profile')
class CompareFormPartTwo(forms.Form):
columns = forms.MultipleChoiceField(label="Colonnes", widget=forms.CheckboxSelectMultiple())
def __init__(self, request, *args, **kwargs):
super().__init__(*args, **kwargs)
self.request = request
self.uploaded = request.session['uploaded']
columns_choices = []
for key in enumerate(self.uploaded):
columns_choices.append(key)
self.fields['columns'].choices = columns_choices
def clean_columns(self):
"""Valide les données sur les columns et transforme les choix en int."""
columns = self.cleaned_data['columns']
return [int(column) for column in columns]
def clean(self):
super().clean()
self.request.session['selection'] = self.cleaned_data.get('columns')
print(self.request.session['selection'])
My code in views.py:
class FormCompareTransporteur(RequestFormMixin, CreateView):
"""Implémente la première étape : le téléchargement du fichier."""
template_name = 'tool/upload-fichier-transporteur.html'
form_class = CompareFormTransporteur
success_url = reverse_lazy('tool:upload-columns-selection')
class FormColumnSelection(RequestFormMixin, FormView):
"""Implémente la seconde étape: le choix multiple dynamique -> choix des colonnes """
template_name = 'tool/upload-columns-selection.html'
form_class = CompareFormPartTwo
success_url = reverse_lazy('tool:result')
class ResultView(TemplateView):
template_name = 'tool/result.html'
def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)
context['uploaded'] = self.request.session['uploaded']
context['selection'] = self.request.session['selection']
context['profile'] = self.request.session['profile']
dict_comparaison_p1 = dict(enumerate(context['uploaded'].items()))
list_comparaison_p2 = context['selection']
dict_comparaison_p2 = {number: dict_comparaison_p1[number] for number in list_comparaison_p2}
d_col = {}
df = pd.DataFrame()
for idx, (k, v) in enumerate(dict_comparaison_p2.items()):
d_col[k] = v[0]
df = df.append(pd.DataFrame(data=v[1], index=[idx]))
df = df.T.rename(columns = d_col)
print(df)
return context
Here's an example of what I get as dataframe:
0 1 2
0 FR 0076029075 NaN
1 FR 0076036929 NaN
2 FR 0076044278 NaN
3 FR 0076044551 NaN
4 FR 0076047452 NaN
Could you please help me to get the names instead of those integers?
Solution 1:[1]
Ok found the solution. The problem was because of the index belonging to the dictionary d_col. The dictionary I got was this one:
{7: 'Value 1', 8: 'Value 2', 9: 'Value 3'}
Dictionary keys were not matching integer columns as mentioned above:
0 1 2
0 FR 0076029075 NaN
1 FR 0076036929 NaN
2 FR 0076044278 NaN
3 FR 0076044551 NaN
4 FR 0076047452 NaN
The solution is to reset integers in the dictionary. So, by adding:
d_col2 = {i: v for i, v in enumerate(d_col.values())}
Such as:
def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)
context['uploaded'] = self.request.session['uploaded']
context['selection'] = self.request.session['selection']
context['profile'] = self.request.session['profile']
dict_comparaison_p1 = dict(enumerate(context['uploaded'].items()))
list_comparaison_p2 = context['selection']
dict_comparaison_p2 = {number: dict_comparaison_p1[number] for number in list_comparaison_p2}
d_col = {}
df = pd.DataFrame()
for idx, (k, v) in enumerate(dict_comparaison_p2.items()):
d_col[k] = v[0]
# print(d_col[k])
df = df.append(pd.DataFrame(data=v[1], index=[idx]))
d_col2 = {i: v for i, v in enumerate(d_col.values())}
df = df.T.rename(columns=d_col2)
return context
It's now working and I get the columns names.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | Vincent |
