'could not convert string to float: 'cafe'
I start to developpe a web application using django, this application predict the sale, for that I used linear regression, I have some variables string, in order to train the model I convert all the string variables to int using method handle_non_numerical_data(). the user of application enter the field of string as string
method
def handle_non_numerical_data(df):
columns = df.columns.values
for column in columns:
text_digit_vals = {}
def convert_to_int(val):
return text_digit_vals[val]
if df[column].dtype != np.int64 and df[column].dtype != np.float64:
column_contents = df[column].values.tolist()
unique_elements = set(column_contents)
x = 0
for unique in unique_elements:
if unique not in text_digit_vals:
text_digit_vals[unique] = x
x = x + 1
df[column] = list(map(convert_to_int, df[column]))
return df
my model
# Libraries
import numpy as np
import pandas as pd
import pickle
from matplotlib import pyplot as plt
from sklearn import metrics
from sklearn import model_selection
#from sklearn import preprocessing
from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
#from sklearn.linear_model import Ridge
from sklearn.externals import joblib
# Importing Dataset
data = pd.read_csv('ml_code/ml_process/test.csv')
data.fillna(0, inplace=True)
def handle_non_numerical_data(df):
columns = df.columns.values
for column in columns:
text_digit_vals = {}
def convert_to_int(val):
return text_digit_vals[val]
if df[column].dtype != np.int64 and df[column].dtype != np.float64:
column_contents = df[column].values.tolist()
unique_elements = set(column_contents)
x = 0
for unique in unique_elements:
if unique not in text_digit_vals:
text_digit_vals[unique] = x
x = x + 1
df[column] = list(map(convert_to_int, df[column]))
return df
data = handle_non_numerical_data(data)
data = data.as_matrix()
#X matrice des var. explicatives
X = data[:,0:9]
#y vecteur de la var. à prédire
y = data[:,9]
X2_train, X2_test, y2_train, y2_test = train_test_split(X, y, test_size=0.3, random_state=0)
lreg = LinearRegression()
lreg.fit(X2_train, y2_train)
print('Accuracy of linear regression on training set: {:.2f}'.format(lreg.score(X2_train, y2_train)))
print('Accuracy of linear regression on test set: {:.2f}'.format(lreg.score(X2_test, y2_test)))
# Saving the Logistic Regression Model
linear_regression_model = pickle.dumps(lreg)
# Saving the model to a file
#with open('ml_code/linear_regression_model.pkl','wb') as f:
joblib.dump(linear_regression_model, 'ml_code/linear_regression_model.pkl')
for predict
import pickle
from sklearn.externals import joblib
linear_regression_model = joblib.load('ml_code/linear_regression_model.pkl')
lreg = pickle.loads(linear_regression_model)
def get_prediction(magasin, numero_article, designation_article, moyen_de_ventes_par_jour, vente_2013,
vente_2014, ventes_2015, ventes_2016, ventes_2017
):
predicted_sales = lreg.predict([[magasin, numero_article, designation_article, moyen_de_ventes_par_jour,
vente_2013, vente_2014, ventes_2015, ventes_2016, ventes_2017
]])
return predicted_sales
but I get this error
ValueError at /
could not convert string to float: 'tea'
Request Method: POST
Request URL: http://127.0.0.1:8000/
Django Version: 2.2
Exception Type: ValueError
Exception Value:
could not convert string to float: 'tea'
Exception Location: C:\Users\hp\AppData\Local\Programs\Python\Python36\dj\f\lib\site-packages\sklearn\utils\validation.py in check_array, line 448
Python Executable: C:\Users\hp\AppData\Local\Programs\Python\Python36\dj\f\Scripts\python.exe
Python Version: 3.6.5
Python Path:
['C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\appweb pred',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\Scripts\\python36.zip',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\DLLs',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\lib',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\Scripts',
'c:\\users\\hp\\appdata\\local\\programs\\python\\python36\\Lib',
'c:\\users\\hp\\appdata\\local\\programs\\python\\python36\\DLLs',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\lib\\site-packages']
Server time: Sat, 27 Apr 2019 03:32:44 +0000
because I convert all the variables of my model,
Solution 1:[1]
Try pop out the column containing the "Cafe" data then run the code. It will work well.
df.pop("column_name")
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | Developer-Felix |
