'(Machine Learning) Unable to convert array of bytes/strings into decimal numbers with dtype='numeric'
Im trying to do some machine learning to predict the outcome of UFC matches, i have alot of data and the error appears on line 48. I believe the problem is due to the first 3 columns are of data type string so it cannot convert it into numeric. I have tried Label encoding but that just prints more errors. Is one-hot encoding the way forward and if so how and where do i put it into my program? Any help is hugely appreciated.
from sklearn import preprocessing
from sklearn.datasets import load_iris
data = load_iris()
features = data.data
targets = data.target
import pandas as pd
import numpy as np
data = pd.read_csv("completedata.csv")
data = np.array(data)
"encoder = preprocessing.LabelEncoder()"
"features[:, 0] = encoder.fit_transform(features[:, 0])"#LabelEncoding attempt
features = data[:, 0:41]
targets = data[:, 41]
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
fh = open("completedatatrain.txt", "r")
lines = fh.readlines()
fh.close()
features = []
targets = []
for line in lines:
line = line.strip()
line = line.split("\t")
features.append(line[0:41])
targets.append(line[41])
features = np.array(features)
targets = np.array(targets)
featuresTrain, featuresTest, targetsTrain, targetsTest = train_test_split(features, targets, test_size=0.2)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(featuresTrain, targetsTrain) #The error prints at here!
predictions = knn.predict(featuresTest)
print(predictions)
accuracy = accuracy_score(targetsTest, predictions)
print(accuracy)
fh = open("testingexample1.txt", "r")
lines = fh.readlines()
fh.close()
features = []
for line in lines:
line = line.strip()
line = line.split("\t")
features.append(line)
holdBack = np.array(features)
predictions = knn.predict(holdBack)
Solution 1:[1]
You have to Label encode the fighters, weight_class, and method columns since these columns are containing strings
Try it and let me know if it works
from sklearn import preprocessing
from sklearn.datasets import load_iris
data = load_iris()
features = data.data
targets = data.target
import pandas as pd
import numpy as np
# data = pd.read_csv("completedata.csv")
data = np.array(data)
"encoder = preprocessing.LabelEncoder()"
"features[:, 0] = encoder.fit_transform(features[:, 0])"#LabelEncoding attempt
# features = data[ :41]
# targets = data[:, 41]
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
fh = open("completedatatrain.txt", "r")
lines = fh.readlines()
fh.close()
features = []
targets = []
for line in lines:
line = line.strip()
line = line.split("\t")
features.append(line[0:41])
targets.append(line[41])
Here is the added code
categorical = ["write your cat columns here"] #label encode your categorical columns le = preprocessing.LabelEncoder() for i in range(len(categorical)): features[:, i] = le.fit_transform(features[:, i])
features = np.array(features)
targets = np.array(targets)
featuresTrain, featuresTest, targetsTrain, targetsTest = train_test_split(features, targets, test_size=0.2)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(featuresTrain, targetsTrain) #The error prints at here!
predictions = knn.predict(featuresTest)
print(predictions)
accuracy = accuracy_score(targetsTest, predictions)
print(accuracy)
# fh = open("testingexample1.txt", "r")
lines = fh.readlines()
fh.close()
features = []
for line in lines:
line = line.strip()
line = line.split("\t")
features.append(line)
holdBack = np.array(features)
predictions = knn.predict(holdBack)
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | Kyriakos |
